diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..5e45afb1f44d4ae06c55176a76cd57d05b6fd1aa 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,43 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +lib/poppler-0.68.0/bin/libcairo-2.dll filter=lfs diff=lfs merge=lfs -text +lib/poppler-0.68.0/bin/libpoppler-79.dll filter=lfs diff=lfs merge=lfs -text +lib/poppler-0.68.0/bin/libpoppler-cpp-0.dll filter=lfs diff=lfs merge=lfs -text +lib/poppler-0.68.0/bin/libstdc++-6.dll filter=lfs diff=lfs merge=lfs -text +lib/poppler-0.68.0/bin/pdfinfo.exe filter=lfs diff=lfs merge=lfs -text +lib/poppler-0.68.0/bin/pdftocairo.exe filter=lfs diff=lfs merge=lfs -text +lib/poppler-0.68.0/bin/pdftohtml.exe filter=lfs diff=lfs merge=lfs -text +lib/poppler-0.68.0/lib/libpoppler.dll.a filter=lfs diff=lfs merge=lfs -text +lib/Tesseract-OCR/ambiguous_words.exe filter=lfs diff=lfs merge=lfs -text +lib/Tesseract-OCR/classifier_tester.exe filter=lfs diff=lfs merge=lfs -text +lib/Tesseract-OCR/cntraining.exe filter=lfs diff=lfs merge=lfs -text +lib/Tesseract-OCR/combine_lang_model.exe filter=lfs diff=lfs merge=lfs -text +lib/Tesseract-OCR/combine_tessdata.exe filter=lfs diff=lfs merge=lfs -text +lib/Tesseract-OCR/libarchive-13.dll filter=lfs diff=lfs merge=lfs -text +lib/Tesseract-OCR/libcairo-2.dll filter=lfs diff=lfs merge=lfs -text +lib/Tesseract-OCR/libcrypto-3-x64.dll filter=lfs diff=lfs merge=lfs -text +lib/Tesseract-OCR/libgio-2.0-0.dll filter=lfs diff=lfs merge=lfs -text +lib/Tesseract-OCR/libglib-2.0-0.dll filter=lfs diff=lfs merge=lfs -text +lib/Tesseract-OCR/libharfbuzz-0.dll filter=lfs diff=lfs merge=lfs -text +lib/Tesseract-OCR/libiconv-2.dll filter=lfs diff=lfs merge=lfs -text +lib/Tesseract-OCR/libicudt72.dll filter=lfs diff=lfs merge=lfs -text +lib/Tesseract-OCR/libicuin72.dll filter=lfs diff=lfs merge=lfs -text +lib/Tesseract-OCR/libicuuc72.dll filter=lfs diff=lfs merge=lfs -text +lib/Tesseract-OCR/libleptonica-6.dll filter=lfs diff=lfs merge=lfs -text +lib/Tesseract-OCR/libstdc++-6.dll filter=lfs diff=lfs merge=lfs -text +lib/Tesseract-OCR/libtesseract-5.dll filter=lfs diff=lfs merge=lfs -text +lib/Tesseract-OCR/libunistring-5.dll filter=lfs diff=lfs merge=lfs -text +lib/Tesseract-OCR/libzstd.dll filter=lfs diff=lfs merge=lfs -text +lib/Tesseract-OCR/lstmeval.exe filter=lfs diff=lfs merge=lfs -text +lib/Tesseract-OCR/lstmtraining.exe filter=lfs diff=lfs merge=lfs -text +lib/Tesseract-OCR/mftraining.exe filter=lfs diff=lfs merge=lfs -text +lib/Tesseract-OCR/set_unicharset_properties.exe filter=lfs diff=lfs merge=lfs -text +lib/Tesseract-OCR/shapeclustering.exe filter=lfs diff=lfs merge=lfs -text +lib/Tesseract-OCR/tessdata/eng.traineddata filter=lfs diff=lfs merge=lfs -text +lib/Tesseract-OCR/tessdata/osd.traineddata filter=lfs diff=lfs merge=lfs -text +lib/Tesseract-OCR/tessdata/spa.traineddata filter=lfs diff=lfs merge=lfs -text +lib/Tesseract-OCR/tesseract.exe filter=lfs diff=lfs merge=lfs -text +lib/Tesseract-OCR/text2image.exe filter=lfs diff=lfs merge=lfs -text +lib/Tesseract-OCR/unicharset_extractor.exe filter=lfs diff=lfs merge=lfs -text +lib/Tesseract-OCR/wordlist2dawg.exe filter=lfs diff=lfs merge=lfs -text diff --git a/lib/Tesseract-OCR/ambiguous_words.1.html b/lib/Tesseract-OCR/ambiguous_words.1.html new file mode 100644 index 0000000000000000000000000000000000000000..0e0c32c9713c669ac40f8973d1937071b896ab5d --- /dev/null +++ b/lib/Tesseract-OCR/ambiguous_words.1.html @@ -0,0 +1,790 @@ + + + + + +AMBIGUOUS_WORDS(1) + + + + + +
+
+

SYNOPSIS

+
+

ambiguous_words [-l lang] TESSDATADIR WORDLIST AMBIGUOUSFILE

+
+
+
+

DESCRIPTION

+
+

ambiguous_words(1) runs Tesseract in a special mode, and for each word +in word list, produces a set of words which Tesseract thinks might be +ambiguous with it. TESSDATADIR must be set to the absolute path of +a directory containing tessdata/lang.traineddata.

+
+
+
+

SEE ALSO

+
+

tesseract(1)

+
+
+
+

COPYING

+
+

Copyright (C) 2012 Google, Inc. +Licensed under the Apache License, Version 2.0

+
+
+
+

AUTHOR

+
+

The Tesseract OCR engine was written by Ray Smith and his research groups +at Hewlett Packard (1985-1995) and Google (2006-present).

+
+
+
+

+ + + diff --git a/lib/Tesseract-OCR/ambiguous_words.exe b/lib/Tesseract-OCR/ambiguous_words.exe new file mode 100644 index 0000000000000000000000000000000000000000..1a7aebc5ea50ea2db61fc4ba1ca44c03bcaa0bb9 --- /dev/null +++ b/lib/Tesseract-OCR/ambiguous_words.exe @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ab179fea004c85e5df83c00a808d9927491592af2a358f225d21cc9f362579f +size 1066496 diff --git a/lib/Tesseract-OCR/classifier_tester.1.html b/lib/Tesseract-OCR/classifier_tester.1.html new file mode 100644 index 0000000000000000000000000000000000000000..a00f961fe8e1f23dab0a28b72d4bb404da86f608 --- /dev/null +++ b/lib/Tesseract-OCR/classifier_tester.1.html @@ -0,0 +1,857 @@ + + + + + +CLASSIFIER_TESTER(1) + + + + + +
+
+

NAME

+
+

classifier_tester - for legacy tesseract engine.

+
+
+
+

SYNOPSIS

+
+

classifier_tester -U unicharset_file -F font_properties_file -X xheights_file -classifier x -lang lang [-output_trainer trainer] *.tr

+
+
+
+

DESCRIPTION

+
+

classifier_tester(1) runs Tesseract in a special mode. +It takes a list of .tr files and tests a character classifier +on data as formatted for training, +but it doesn’t have to be the same as the training data.

+
+
+
+

IN/OUT ARGUMENTS

+
+

a list of .tr files

+
+
+
+

OPTIONS

+
+
+
+-l lang +
+
+

+ (Input) three character language code; default value eng. +

+
+
+-classifier x +
+
+

+ (Input) One of "pruner", "full". +

+
+
+-U unicharset +
+
+

+ (Input) The unicharset for the language. +

+
+
+-F font_properties_file +
+
+

+ (Input) font properties file, each line is of the following form, where each field other than the font name is 0 or 1: +

+
+
+
*font_name* *italic* *bold* *fixed_pitch* *serif* *fraktur*
+
+
+
+-X xheights_file +
+
+

+ (Input) x heights file, each line is of the following form, where xheight is calculated as the pixel x height of a character drawn at 32pt on 300 dpi. [ That is, if base x height + ascenders + descenders = 133, how much is x height? ] +

+
+
+
*font_name* *xheight*
+
+
+
+-output_trainer trainer +
+
+

+ (Output, Optional) Filename for output trainer. +

+
+
+
+
+
+

SEE ALSO

+
+

tesseract(1)

+
+
+
+

COPYING

+
+

Copyright (C) 2012 Google, Inc. +Licensed under the Apache License, Version 2.0

+
+
+
+

AUTHOR

+
+

The Tesseract OCR engine was written by Ray Smith and his research groups +at Hewlett Packard (1985-1995) and Google (2006-present).

+
+
+
+

+ + + diff --git a/lib/Tesseract-OCR/classifier_tester.exe b/lib/Tesseract-OCR/classifier_tester.exe new file mode 100644 index 0000000000000000000000000000000000000000..cbc0f90cb0f9ede6ae8a9545b12e09dea05f8c91 --- /dev/null +++ b/lib/Tesseract-OCR/classifier_tester.exe @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2885d7da65ed487ebb41d2635bef008014655af7cbafd47427c08d855bf7935f +size 4986528 diff --git a/lib/Tesseract-OCR/cntraining.1.html b/lib/Tesseract-OCR/cntraining.1.html new file mode 100644 index 0000000000000000000000000000000000000000..69a13f7fd5ed28903269cb62d8c8a0c1ccfc67c5 --- /dev/null +++ b/lib/Tesseract-OCR/cntraining.1.html @@ -0,0 +1,803 @@ + + + + + +CNTRAINING(1) + + + + + +
+
+

NAME

+
+

cntraining - character normalization training for Tesseract

+
+
+
+

SYNOPSIS

+
+

cntraining [-D dir] FILE

+
+
+
+

DESCRIPTION

+
+

cntraining takes a list of .tr files, from which it generates the +normproto data file (the character normalization sensitivity +prototypes).

+
+
+
+

OPTIONS

+
+
+
+-D dir +
+
+

+ Directory to write output files to. +

+
+
+
+
+
+

SEE ALSO

+
+

tesseract(1), shapeclustering(1), mftraining(1)

+ +
+
+
+

COPYING

+
+

Copyright (c) Hewlett-Packard Company, 1988 +Licensed under the Apache License, Version 2.0

+
+
+
+

AUTHOR

+
+

The Tesseract OCR engine was written by Ray Smith and his research groups +at Hewlett Packard (1985-1995) and Google (2006-present).

+
+
+
+

+ + + diff --git a/lib/Tesseract-OCR/cntraining.exe b/lib/Tesseract-OCR/cntraining.exe new file mode 100644 index 0000000000000000000000000000000000000000..6b0650b0e2174fa36d9c85f5153e18e46264226d --- /dev/null +++ b/lib/Tesseract-OCR/cntraining.exe @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8cf7ea625b72bb0bc1d4c4e0a55b141d8b3d0611effd66899457bf189122488 +size 4709776 diff --git a/lib/Tesseract-OCR/combine_lang_model.1.html b/lib/Tesseract-OCR/combine_lang_model.1.html new file mode 100644 index 0000000000000000000000000000000000000000..0f791366017d67b65278c13bff34d8177da72555 --- /dev/null +++ b/lib/Tesseract-OCR/combine_lang_model.1.html @@ -0,0 +1,888 @@ + + + + + +COMBINE_LANG_MODEL(1) + + + + + +
+
+

SYNOPSIS

+
+

combine_lang_model --input_unicharset filename --script_dir dirname --output_dir rootdir --lang lang [--lang_is_rtl] [pass_through_recoder] [--words file --puncs file --numbers file]

+
+
+
+

DESCRIPTION

+
+

combine_lang_model(1) generates a starter traineddata file that can be used to train an LSTM-based neural network model. It takes as input a unicharset and an optional set of wordlists. It eliminates the need to run set_unicharset_properties(1), wordlist2dawg(1), some non-existent binary to generate the recoder (unicode compressor), and finally combine_tessdata(1).

+
+
+
+

OPTIONS

+
+
+
+--lang lang +
+
+

+ The language to use. + Tesseract uses 3-character ISO 639-2 language codes. (See LANGUAGES) +

+
+
+--script_dir PATH +
+
+

+ Directory name for input script unicharsets. It should point to the location of langdata (github repo) directory. (type:string default:) +

+
+
+--input_unicharset FILE +
+
+

+ Unicharset to complete and use in encoding. It can be a hand-created file with incomplete fields. Its basic and script properties will be set before it is used. (type:string default:) +

+
+
+--lang_is_rtl BOOL +
+
+

+ True if language being processed is written right-to-left (eg Arabic/Hebrew). (type:bool default:false) +

+
+
+--pass_through_recoder BOOL +
+
+

+ If true, the recoder is a simple pass-through of the unicharset. Otherwise, potentially a compression of it by encoding Hangul in Jamos, decomposing multi-unicode symbols into sequences of unicodes, and encoding Han using the data in the radical_table_data, which must be the content of the file: langdata/radical-stroke.txt. (type:bool default:false) +

+
+
+--version_str STRING +
+
+

+ An arbitrary version label to add to traineddata file (type:string default:) +

+
+
+--words FILE +
+
+

+ (Optional) File listing words to use for the system dictionary (type:string default:) +

+
+
+--numbers FILE +
+
+

+ (Optional) File listing number patterns (type:string default:) +

+
+
+--puncs FILE +
+
+

+ (Optional) File listing punctuation patterns. The words/puncs/numbers lists may be all empty. If any are non-empty then puncs must be non-empty. (type:string default:) +

+
+
+--output_dir PATH +
+
+

+ Root directory for output files. Output files will be written to <output_dir>/<lang>/<lang>.* (type:string default:) +

+
+
+
+
+
+

HISTORY

+
+

combine_lang_model(1) was first made available for tesseract4.00.00alpha.

+
+
+
+

RESOURCES

+ +
+
+

SEE ALSO

+
+

tesseract(1)

+
+
+
+

COPYING

+
+

Copyright (C) 2012 Google, Inc. +Licensed under the Apache License, Version 2.0

+
+
+
+

AUTHOR

+
+

The Tesseract OCR engine was written by Ray Smith and his research groups +at Hewlett Packard (1985-1995) and Google (2006-present).

+
+
+
+

+ + + diff --git a/lib/Tesseract-OCR/combine_lang_model.exe b/lib/Tesseract-OCR/combine_lang_model.exe new file mode 100644 index 0000000000000000000000000000000000000000..346294259ab17782808d0cde483c8b1980ee629a --- /dev/null +++ b/lib/Tesseract-OCR/combine_lang_model.exe @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7213745662150295908543bfb1fa669cf1edd6f3f01b682e8fb1d595e6b48eaf +size 3503232 diff --git a/lib/Tesseract-OCR/combine_tessdata.1.html b/lib/Tesseract-OCR/combine_tessdata.1.html new file mode 100644 index 0000000000000000000000000000000000000000..e132f86a663f0a99fee15b985793eebf44dd9764 --- /dev/null +++ b/lib/Tesseract-OCR/combine_tessdata.1.html @@ -0,0 +1,1070 @@ + + + + + +COMBINE_TESSDATA(1) + + + + + +
+
+

NAME

+
+

combine_tessdata - combine/extract/overwrite/list/compact Tesseract data

+
+
+
+

SYNOPSIS

+
+

combine_tessdata [OPTION] FILE

+
+
+
+

DESCRIPTION

+
+

combine_tessdata(1) is the main program to combine/extract/overwrite/list/compact +tessdata components in [lang].traineddata files.

+

To combine all the individual tessdata components (unicharset, DAWGs, +classifier templates, ambiguities, language configs) located at, say, +/home/$USER/temp/eng.* run:

+
+
+
combine_tessdata /home/$USER/temp/eng.
+
+

The result will be a combined tessdata file /home/$USER/temp/eng.traineddata

+

Specify option -e if you would like to extract individual components +from a combined traineddata file. For example, to extract language config +file and the unicharset from tessdata/eng.traineddata run:

+
+
+
combine_tessdata -e tessdata/eng.traineddata \
+  /home/$USER/temp/eng.config /home/$USER/temp/eng.unicharset
+
+

The desired config file and unicharset will be written to +/home/$USER/temp/eng.config /home/$USER/temp/eng.unicharset

+

Specify option -o to overwrite individual components of the given +[lang].traineddata file. For example, to overwrite language config +and unichar ambiguities files in tessdata/eng.traineddata use:

+
+
+
combine_tessdata -o tessdata/eng.traineddata \
+  /home/$USER/temp/eng.config /home/$USER/temp/eng.unicharambigs
+
+

As a result, tessdata/eng.traineddata will contain the new language config +and unichar ambigs, plus all the original DAWGs, classifier templates, etc.

+

Note: the file names of the files to extract to and to overwrite from should +have the appropriate file suffixes (extensions) indicating their tessdata +component type (.unicharset for the unicharset, .unicharambigs for unichar +ambigs, etc). See k*FileSuffix variable in ccutil/tessdatamanager.h.

+

Specify option -u to unpack all the components to the specified path:

+
+
+
combine_tessdata -u tessdata/eng.traineddata /home/$USER/temp/eng.
+
+

This will create /home/$USER/temp/eng.* files with individual tessdata +components from tessdata/eng.traineddata.

+
+
+
+

OPTIONS

+
+

-c .traineddata FILE…: + Compacts the LSTM component in the .traineddata file to int.

+

-d .traineddata FILE…: + Lists directory of components from the .traineddata file.

+

-e .traineddata FILE…: + Extracts the specified components from the .traineddata file

+

-l .traineddata FILE…: + List the network information.

+

-o .traineddata FILE…: + Overwrites the specified components of the .traineddata file + with those provided on the command line.

+

-u .traineddata PATHPREFIX + Unpacks the .traineddata using the provided prefix.

+
+
+
+

CAVEATS

+
+

Prefix refers to the full file prefix, including period (.)

+
+
+
+

COMPONENTS

+
+

The components in a Tesseract lang.traineddata file as of +Tesseract 4.0 are briefly described below; For more information on +many of these files, see +https://tesseract-ocr.github.io/tessdoc/Training-Tesseract.html +and +https://tesseract-ocr.github.io/tessdoc/TrainingTesseract-4.00.html

+
+
+lang.config +
+
+

+ (Optional) Language-specific overrides to default config variables. + For 4.0 traineddata files, lang.config provides control parameters which + can affect layout analysis, and sub-languages. +

+
+
+lang.unicharset +
+
+

+ (Required - 3.0x legacy tesseract) The list of symbols that Tesseract recognizes, with properties. + See unicharset(5). +

+
+
+lang.unicharambigs +
+
+

+ (Optional - 3.0x legacy tesseract) This file contains information on pairs of recognized symbols + which are often confused. For example, rn and m. +

+
+
+lang.inttemp +
+
+

+ (Required - 3.0x legacy tesseract) Character shape templates for each unichar. Produced by + mftraining(1). +

+
+
+lang.pffmtable +
+
+

+ (Required - 3.0x legacy tesseract) The number of features expected for each unichar. + Produced by mftraining(1) from .tr files. +

+
+
+lang.normproto +
+
+

+ (Required - 3.0x legacy tesseract) Character normalization prototypes generated by cntraining(1) + from .tr files. +

+
+
+lang.punc-dawg +
+
+

+ (Optional - 3.0x legacy tesseract) A dawg made from punctuation patterns found around words. + The "word" part is replaced by a single space. +

+
+
+lang.word-dawg +
+
+

+ (Optional - 3.0x legacy tesseract) A dawg made from dictionary words from the language. +

+
+
+lang.number-dawg +
+
+

+ (Optional - 3.0x legacy tesseract) A dawg made from tokens which originally contained digits. + Each digit is replaced by a space character. +

+
+
+lang.freq-dawg +
+
+

+ (Optional - 3.0x legacy tesseract) A dawg made from the most frequent words which would have + gone into word-dawg. +

+
+
+lang.fixed-length-dawgs +
+
+

+ (Optional - 3.0x legacy tesseract) Several dawgs of different fixed lengths — useful for + languages like Chinese. +

+
+
+lang.shapetable +
+
+

+ (Optional - 3.0x legacy tesseract) When present, a shapetable is an extra layer between the character + classifier and the word recognizer that allows the character classifier to + return a collection of unichar ids and fonts instead of a single unichar-id + and font. +

+
+
+lang.bigram-dawg +
+
+

+ (Optional - 3.0x legacy tesseract) A dawg of word bigrams where the words are separated by a space + and each digit is replaced by a ?. +

+
+
+lang.unambig-dawg +
+
+

+ (Optional - 3.0x legacy tesseract) . +

+
+
+lang.params-model +
+
+

+ (Optional - 3.0x legacy tesseract) . +

+
+
+lang.lstm +
+
+

+ (Required - 4.0 LSTM) Neural net trained recognition model generated by lstmtraining. +

+
+
+lang.lstm-punc-dawg +
+
+

+ (Optional - 4.0 LSTM) A dawg made from punctuation patterns found around words. + The "word" part is replaced by a single space. Uses lang.lstm-unicharset. +

+
+
+lang.lstm-word-dawg +
+
+

+ (Optional - 4.0 LSTM) A dawg made from dictionary words from the language. + Uses lang.lstm-unicharset. +

+
+
+lang.lstm-number-dawg +
+
+

+ (Optional - 4.0 LSTM) A dawg made from tokens which originally contained digits. + Each digit is replaced by a space character. Uses lang.lstm-unicharset. +

+
+
+lang.lstm-unicharset +
+
+

+ (Required - 4.0 LSTM) The unicode character set that Tesseract recognizes, with properties. + Same unicharset must be used to train the LSTM and build the lstm-*-dawgs files. +

+
+
+lang.lstm-recoder +
+
+

+ (Required - 4.0 LSTM) Unicharcompress, aka the recoder, which maps the unicharset + further to the codes actually used by the neural network recognizer. This is created as + part of the starter traineddata by combine_lang_model. +

+
+
+lang.version +
+
+

+ (Optional) Version string for the traineddata file. + First appeared in version 4.0 of Tesseract. + Old version of traineddata files will report Version:Pre-4.0.0. + 4.0 version of traineddata files may include the network spec + used for LSTM training as part of version string. +

+
+
+
+
+
+

HISTORY

+
+

combine_tessdata(1) first appeared in version 3.00 of Tesseract

+
+
+
+

SEE ALSO

+
+

tesseract(1), wordlist2dawg(1), cntraining(1), mftraining(1), unicharset(5), +unicharambigs(5)

+
+
+
+

COPYING

+
+

Copyright (C) 2009, Google Inc. +Licensed under the Apache License, Version 2.0

+
+
+
+

AUTHOR

+
+

The Tesseract OCR engine was written by Ray Smith and his research groups +at Hewlett Packard (1985-1995) and Google (2006-present).

+
+
+
+

+ + + diff --git a/lib/Tesseract-OCR/combine_tessdata.exe b/lib/Tesseract-OCR/combine_tessdata.exe new file mode 100644 index 0000000000000000000000000000000000000000..f3cc4fda74aab3a10de46a65e9070825ed8d11c5 --- /dev/null +++ b/lib/Tesseract-OCR/combine_tessdata.exe @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec4d422d2fab6f384fbf2096d3910bb6ef1d50a8df095d20cb9393c228300103 +size 1280584 diff --git a/lib/Tesseract-OCR/dawg2wordlist.1.html b/lib/Tesseract-OCR/dawg2wordlist.1.html new file mode 100644 index 0000000000000000000000000000000000000000..bac0b5e809f2b5313ea1f5a40f54749180103b87 --- /dev/null +++ b/lib/Tesseract-OCR/dawg2wordlist.1.html @@ -0,0 +1,802 @@ + + + + + +DAWG2WORDLIST(1) + + + + + +
+
+

SYNOPSIS

+
+

dawg2wordlist UNICHARSET DAWG WORDLIST

+
+
+
+

DESCRIPTION

+
+

dawg2wordlist(1) converts a Tesseract Directed Acyclic Word +Graph (DAWG) to a list of words using a unicharset as key.

+
+
+
+

OPTIONS

+
+

UNICHARSET + The unicharset of the language. This is the unicharset + generated by mftraining(1).

+

DAWG + The input DAWG, created by wordlist2dawg(1)

+

WORDLIST + Plain text (output) file in UTF-8, one word per line

+
+
+
+

SEE ALSO

+
+

tesseract(1), mftraining(1), wordlist2dawg(1), unicharset(5), +combine_tessdata(1)

+ +
+
+
+

COPYING

+
+

Copyright (C) 2012 Google, Inc. +Licensed under the Apache License, Version 2.0

+
+
+
+

AUTHOR

+
+

The Tesseract OCR engine was written by Ray Smith and his research groups +at Hewlett Packard (1985-1995) and Google (2006-present).

+
+
+
+

+ + + diff --git a/lib/Tesseract-OCR/dawg2wordlist.exe b/lib/Tesseract-OCR/dawg2wordlist.exe new file mode 100644 index 0000000000000000000000000000000000000000..39981f767b48223635adb5c68c52d2e11eb984f5 Binary files /dev/null and b/lib/Tesseract-OCR/dawg2wordlist.exe differ diff --git a/lib/Tesseract-OCR/doc/AUTHORS b/lib/Tesseract-OCR/doc/AUTHORS new file mode 100644 index 0000000000000000000000000000000000000000..9d1e73c3083260ae9485e7d07f6ac1fa271737fb --- /dev/null +++ b/lib/Tesseract-OCR/doc/AUTHORS @@ -0,0 +1,51 @@ +Ray Smith (lead developer) +Ahmad Abdulkader +Rika Antonova +Nicholas Beato +Jeff Breidenbach +Samuel Charron +Phil Cheatle +Simon Crouch +David Eger +Sheelagh Huddleston +Dan Johnson +Rajesh Katikam +Thomas Kielbus +Dar-Shyang Lee +Zongyi (Joe) Liu +Robert Moss +Chris Newton +Michael Reimer +Marius Renn +Raquel Romano +Christy Russon +Shobhit Saxena +Mark Seaman +Faisal Shafait +Hiroshi Takenaka +Ranjith Unnikrishnan +Joern Wanke +Ping Ping Xiu +Andrew Ziem +Oscar Zuniga + +Community Contributors: +Zdenko Podobný (Maintainer) +Jim Regan (Maintainer) +James R Barlow +Stefan Brechtken +Thomas Breuel +Amit Dovev +Martin Ettl +Shree Devi Kumar +Noah Metzger +Tom Morris +Tobias Müller +Egor Pugin +Robert Sachunsky +Raf Schietekat +Sundar M. Vaidya +Robin Watts +Stefan Weil +Nick White +Alexander Zaitsev diff --git a/lib/Tesseract-OCR/doc/LICENSE b/lib/Tesseract-OCR/doc/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..d645695673349e3947e8e5ae42332d0ac3164cd7 --- /dev/null +++ b/lib/Tesseract-OCR/doc/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/lib/Tesseract-OCR/doc/README.md b/lib/Tesseract-OCR/doc/README.md new file mode 100644 index 0000000000000000000000000000000000000000..cbd9af46ccf112e3f40509322022fc8427d1a562 --- /dev/null +++ b/lib/Tesseract-OCR/doc/README.md @@ -0,0 +1,134 @@ +# Tesseract OCR + +[![Build status](https://ci.appveyor.com/api/projects/status/miah0ikfsf0j3819/branch/master?svg=true)](https://ci.appveyor.com/project/zdenop/tesseract/) +[![Build status](https://github.com/tesseract-ocr/tesseract/workflows/sw/badge.svg)](https://github.com/tesseract-ocr/tesseract/actions/workflows/sw.yml)\ +[![Coverity Scan Build Status](https://scan.coverity.com/projects/tesseract-ocr/badge.svg)](https://scan.coverity.com/projects/tesseract-ocr) +[![CodeQL](https://github.com/tesseract-ocr/tesseract/workflows/CodeQL/badge.svg)](https://github.com/tesseract-ocr/tesseract/security/code-scanning) +[![OSS-Fuzz](https://img.shields.io/badge/oss--fuzz-fuzzing-brightgreen)](https://bugs.chromium.org/p/oss-fuzz/issues/list?sort=-opened&can=2&q=proj:tesseract-ocr) +\ +[![GitHub license](https://img.shields.io/badge/license-Apache--2.0-blue.svg)](https://raw.githubusercontent.com/tesseract-ocr/tesseract/main/LICENSE) +[![Downloads](https://img.shields.io/badge/download-all%20releases-brightgreen.svg)](https://github.com/tesseract-ocr/tesseract/releases/) + +## Table of Contents + +* [Tesseract OCR](#tesseract-ocr) + * [About](#about) + * [Brief history](#brief-history) + * [Installing Tesseract](#installing-tesseract) + * [Running Tesseract](#running-tesseract) + * [For developers](#for-developers) + * [Support](#support) + * [License](#license) + * [Dependencies](#dependencies) + * [Latest Version of README](#latest-version-of-readme) + +## About + +This package contains an **OCR engine** - `libtesseract` and a **command line program** - `tesseract`. + +Tesseract 4 adds a new neural net (LSTM) based [OCR engine](https://en.wikipedia.org/wiki/Optical_character_recognition) which is focused on line recognition, but also still supports the legacy Tesseract OCR engine of Tesseract 3 which works by recognizing character patterns. Compatibility with Tesseract 3 is enabled by using the Legacy OCR Engine mode (--oem 0). +It also needs [traineddata](https://tesseract-ocr.github.io/tessdoc/Data-Files.html) files which support the legacy engine, for example those from the [tessdata](https://github.com/tesseract-ocr/tessdata) repository. + +Stefan Weil is the current lead developer. Ray Smith was the lead developer until 2018. The maintainer is Zdenko Podobny. For a list of contributors see [AUTHORS](https://github.com/tesseract-ocr/tesseract/blob/main/AUTHORS) +and GitHub's log of [contributors](https://github.com/tesseract-ocr/tesseract/graphs/contributors). + +Tesseract has **unicode (UTF-8) support**, and can **recognize [more than 100 languages](https://tesseract-ocr.github.io/tessdoc/Data-Files-in-different-versions.html)** "out of the box". + +Tesseract supports **[various image formats](https://tesseract-ocr.github.io/tessdoc/InputFormats)** including PNG, JPEG and TIFF. + +Tesseract supports **various output formats**: plain text, hOCR (HTML), PDF, invisible-text-only PDF, TSV and ALTO (the last one - since version 4.1.0). + +You should note that in many cases, in order to get better OCR results, you'll need to **[improve the quality](https://tesseract-ocr.github.io/tessdoc/ImproveQuality.html) of the image** you are giving Tesseract. + +This project **does not include a GUI application**. If you need one, please see the [3rdParty](https://tesseract-ocr.github.io/tessdoc/User-Projects-%E2%80%93-3rdParty.html) documentation. + +Tesseract **can be trained to recognize other languages**. +See [Tesseract Training](https://tesseract-ocr.github.io/tessdoc/Training-Tesseract.html) for more information. + +## Brief history + +Tesseract was originally developed at Hewlett-Packard Laboratories Bristol UK and at Hewlett-Packard Co, Greeley Colorado USA between 1985 and 1994, with some more changes made in 1996 to port to Windows, and some C++izing in 1998. In 2005 Tesseract was open sourced by HP. From 2006 until November 2018 it was developed by Google. + +Major version 5 is the current stable version and started with release +[5.0.0](https://github.com/tesseract-ocr/tesseract/releases/tag/5.0.0) on November 30, 2021. Newer minor versions and bugfix versions are available from +[GitHub](https://github.com/tesseract-ocr/tesseract/releases/). + +Latest source code is available from [main branch on GitHub](https://github.com/tesseract-ocr/tesseract/tree/main). +Open issues can be found in [issue tracker](https://github.com/tesseract-ocr/tesseract/issues), +and [planning documentation](https://tesseract-ocr.github.io/tessdoc/Planning.html). + +See **[Release Notes](https://tesseract-ocr.github.io/tessdoc/ReleaseNotes.html)** +and **[Change Log](https://github.com/tesseract-ocr/tesseract/blob/main/ChangeLog)** for more details of the releases. + +## Installing Tesseract + +You can either [Install Tesseract via pre-built binary package](https://tesseract-ocr.github.io/tessdoc/Installation.html) +or [build it from source](https://tesseract-ocr.github.io/tessdoc/Compiling.html). + +A C++ compiler with good C++17 support is required for building Tesseract from source. + +## Running Tesseract + +Basic **[command line usage](https://tesseract-ocr.github.io/tessdoc/Command-Line-Usage.html)**: + + tesseract imagename outputbase [-l lang] [--oem ocrenginemode] [--psm pagesegmode] [configfiles...] + +For more information about the various command line options use `tesseract --help` or `man tesseract`. + +Examples can be found in the [documentation](https://tesseract-ocr.github.io/tessdoc/Command-Line-Usage.html#simplest-invocation-to-ocr-an-image). + +## For developers + +Developers can use `libtesseract` [C](https://github.com/tesseract-ocr/tesseract/blob/main/include/tesseract/capi.h) or +[C++](https://github.com/tesseract-ocr/tesseract/blob/main/include/tesseract/baseapi.h) API to build their own application. If you need bindings to `libtesseract` for other programming languages, please see the +[wrapper](https://tesseract-ocr.github.io/tessdoc/AddOns.html#tesseract-wrappers) section in the AddOns documentation. + +Documentation of Tesseract generated from source code by doxygen can be found on [tesseract-ocr.github.io](https://tesseract-ocr.github.io/). + +## Support + +Before you submit an issue, please review **[the guidelines for this repository](https://github.com/tesseract-ocr/tesseract/blob/main/CONTRIBUTING.md)**. + +For support, first read the [documentation](https://tesseract-ocr.github.io/tessdoc/), +particularly the [FAQ](https://tesseract-ocr.github.io/tessdoc/FAQ.html) to see if your problem is addressed there. +If not, search the [Tesseract user forum](https://groups.google.com/g/tesseract-ocr), the [Tesseract developer forum](https://groups.google.com/g/tesseract-dev) and [past issues](https://github.com/tesseract-ocr/tesseract/issues), and if you still can't find what you need, ask for support in the mailing-lists. + +Mailing-lists: + +* [tesseract-ocr](https://groups.google.com/g/tesseract-ocr) - For tesseract users. +* [tesseract-dev](https://groups.google.com/g/tesseract-dev) - For tesseract developers. + +Please report an issue only for a **bug**, not for asking questions. + +## License + + The code in this repository is licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +**NOTE**: This software depends on other packages that may be licensed under different open source licenses. + +Tesseract uses [Leptonica library](http://leptonica.com/) which essentially +uses a [BSD 2-clause license](http://leptonica.com/about-the-license.html). + +## Dependencies + +Tesseract uses [Leptonica library](https://github.com/DanBloomberg/leptonica) +for opening input images (e.g. not documents like pdf). +It is suggested to use leptonica with built-in support for [zlib](https://zlib.net), +[png](https://sourceforge.net/projects/libpng) and +[tiff](http://www.simplesystems.org/libtiff) (for multipage tiff). + +## Latest Version of README + +For the latest online version of the README.md see: + + diff --git a/lib/Tesseract-OCR/libLerc.dll b/lib/Tesseract-OCR/libLerc.dll new file mode 100644 index 0000000000000000000000000000000000000000..1c089e20ba3aa0a64a9af9fc49db564eab85a52d Binary files /dev/null and b/lib/Tesseract-OCR/libLerc.dll differ diff --git a/lib/Tesseract-OCR/libarchive-13.dll b/lib/Tesseract-OCR/libarchive-13.dll new file mode 100644 index 0000000000000000000000000000000000000000..5107900ea6252042accb0b797cd15cd09ce3c1b1 --- /dev/null +++ b/lib/Tesseract-OCR/libarchive-13.dll @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c0d6b2218d906e68d86d22e88a2c27c839af1e16cbc04790ecd7a938ca571c3 +size 1391057 diff --git a/lib/Tesseract-OCR/libb2-1.dll b/lib/Tesseract-OCR/libb2-1.dll new file mode 100644 index 0000000000000000000000000000000000000000..1c1166a758d115949698f7ca4f48fb1287d37f4f Binary files /dev/null and b/lib/Tesseract-OCR/libb2-1.dll differ diff --git a/lib/Tesseract-OCR/libbrotlicommon.dll b/lib/Tesseract-OCR/libbrotlicommon.dll new file mode 100644 index 0000000000000000000000000000000000000000..95b1b4e6ecafea3654785c9369d4aafafedbaf20 Binary files /dev/null and b/lib/Tesseract-OCR/libbrotlicommon.dll differ diff --git a/lib/Tesseract-OCR/libbrotlidec.dll b/lib/Tesseract-OCR/libbrotlidec.dll new file mode 100644 index 0000000000000000000000000000000000000000..1b86f76807e87efe40b4b83bd4d89b3624bc8c0b Binary files /dev/null and b/lib/Tesseract-OCR/libbrotlidec.dll differ diff --git a/lib/Tesseract-OCR/libbz2-1.dll b/lib/Tesseract-OCR/libbz2-1.dll new file mode 100644 index 0000000000000000000000000000000000000000..83d25c485e19121a86f90f294f89dd5387b1786b Binary files /dev/null and b/lib/Tesseract-OCR/libbz2-1.dll differ diff --git a/lib/Tesseract-OCR/libcairo-2.dll b/lib/Tesseract-OCR/libcairo-2.dll new file mode 100644 index 0000000000000000000000000000000000000000..b98dbdd1bcee6b4d3cfc5f72d121b38675308a03 --- /dev/null +++ b/lib/Tesseract-OCR/libcairo-2.dll @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7982d8b78327af963982d45afd7f348a6c3249b8ff287b7d5f1e0b023c1814ec +size 1241852 diff --git a/lib/Tesseract-OCR/libcrypto-3-x64.dll b/lib/Tesseract-OCR/libcrypto-3-x64.dll new file mode 100644 index 0000000000000000000000000000000000000000..81a2f0ad692917b97b9f18c92dcb9163f825832b --- /dev/null +++ b/lib/Tesseract-OCR/libcrypto-3-x64.dll @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:078981a11aebe9cc5a4820798a7dc5de4a2f6fd03a47f7b5899a276d9f59f7eb +size 4902799 diff --git a/lib/Tesseract-OCR/libcurl-4.dll b/lib/Tesseract-OCR/libcurl-4.dll new file mode 100644 index 0000000000000000000000000000000000000000..6be3b1315b30e8c7ce71b67732beddc6a35fddbe Binary files /dev/null and b/lib/Tesseract-OCR/libcurl-4.dll differ diff --git a/lib/Tesseract-OCR/libdatrie-1.dll b/lib/Tesseract-OCR/libdatrie-1.dll new file mode 100644 index 0000000000000000000000000000000000000000..05c0930c46cc9355125f5144dba0c8204b55489c Binary files /dev/null and b/lib/Tesseract-OCR/libdatrie-1.dll differ diff --git a/lib/Tesseract-OCR/libdeflate.dll b/lib/Tesseract-OCR/libdeflate.dll new file mode 100644 index 0000000000000000000000000000000000000000..b4ea67c6ab092968d91fe79e686be8071f786142 Binary files /dev/null and b/lib/Tesseract-OCR/libdeflate.dll differ diff --git a/lib/Tesseract-OCR/libexpat-1.dll b/lib/Tesseract-OCR/libexpat-1.dll new file mode 100644 index 0000000000000000000000000000000000000000..f3ea25d8ede42f8d856ddbce4ed2f4a271d37a38 Binary files /dev/null and b/lib/Tesseract-OCR/libexpat-1.dll differ diff --git a/lib/Tesseract-OCR/libffi-8.dll b/lib/Tesseract-OCR/libffi-8.dll new file mode 100644 index 0000000000000000000000000000000000000000..ab5b22547dcacc8f1f938ab7201c6b12e8c8090f Binary files /dev/null and b/lib/Tesseract-OCR/libffi-8.dll differ diff --git a/lib/Tesseract-OCR/libfontconfig-1.dll b/lib/Tesseract-OCR/libfontconfig-1.dll new file mode 100644 index 0000000000000000000000000000000000000000..fcd9d04cd2d5643be48bcc8415f0f53318740500 Binary files /dev/null and b/lib/Tesseract-OCR/libfontconfig-1.dll differ diff --git a/lib/Tesseract-OCR/libfreetype-6.dll b/lib/Tesseract-OCR/libfreetype-6.dll new file mode 100644 index 0000000000000000000000000000000000000000..c34a6cba3c68a2ac90a2c4360f2491ac03678ffb Binary files /dev/null and b/lib/Tesseract-OCR/libfreetype-6.dll differ diff --git a/lib/Tesseract-OCR/libfribidi-0.dll b/lib/Tesseract-OCR/libfribidi-0.dll new file mode 100644 index 0000000000000000000000000000000000000000..c3ab0fd7137aec0deb25a88cd94c964c4c22ec90 Binary files /dev/null and b/lib/Tesseract-OCR/libfribidi-0.dll differ diff --git a/lib/Tesseract-OCR/libgcc_s_seh-1.dll b/lib/Tesseract-OCR/libgcc_s_seh-1.dll new file mode 100644 index 0000000000000000000000000000000000000000..0fb510d8b747ef6fed56f8d593214cff50996a6d Binary files /dev/null and b/lib/Tesseract-OCR/libgcc_s_seh-1.dll differ diff --git a/lib/Tesseract-OCR/libgif-7.dll b/lib/Tesseract-OCR/libgif-7.dll new file mode 100644 index 0000000000000000000000000000000000000000..6831ad0a2c215a1823aa74521bf86b2b490eecda Binary files /dev/null and b/lib/Tesseract-OCR/libgif-7.dll differ diff --git a/lib/Tesseract-OCR/libgio-2.0-0.dll b/lib/Tesseract-OCR/libgio-2.0-0.dll new file mode 100644 index 0000000000000000000000000000000000000000..568e301a938602b28308ee5dae82f55a9ae3cbd8 --- /dev/null +++ b/lib/Tesseract-OCR/libgio-2.0-0.dll @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20565bdeb9fad00ba821bdba10379c3579458ba0e916ed627b6a94adfc498dce +size 1798596 diff --git a/lib/Tesseract-OCR/libglib-2.0-0.dll b/lib/Tesseract-OCR/libglib-2.0-0.dll new file mode 100644 index 0000000000000000000000000000000000000000..6e9c55e97982b6fd796b835199a714bed61366bc --- /dev/null +++ b/lib/Tesseract-OCR/libglib-2.0-0.dll @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87c1df8482d45b50fb72fd87b213475e8385dc7b0b9a8df552de706b050612d2 +size 1433294 diff --git a/lib/Tesseract-OCR/libgmodule-2.0-0.dll b/lib/Tesseract-OCR/libgmodule-2.0-0.dll new file mode 100644 index 0000000000000000000000000000000000000000..c3359d95f63e54530b7bed388956a0973422dbb6 Binary files /dev/null and b/lib/Tesseract-OCR/libgmodule-2.0-0.dll differ diff --git a/lib/Tesseract-OCR/libgobject-2.0-0.dll b/lib/Tesseract-OCR/libgobject-2.0-0.dll new file mode 100644 index 0000000000000000000000000000000000000000..c6066d81eac28945ce50fca1634c010fb7d3c505 Binary files /dev/null and b/lib/Tesseract-OCR/libgobject-2.0-0.dll differ diff --git a/lib/Tesseract-OCR/libgraphite2.dll b/lib/Tesseract-OCR/libgraphite2.dll new file mode 100644 index 0000000000000000000000000000000000000000..70cf27ef9be11aeed2e5681d2659f01a65322b17 Binary files /dev/null and b/lib/Tesseract-OCR/libgraphite2.dll differ diff --git a/lib/Tesseract-OCR/libharfbuzz-0.dll b/lib/Tesseract-OCR/libharfbuzz-0.dll new file mode 100644 index 0000000000000000000000000000000000000000..f71fb8ab2c2412cb9ef5f81934cb6db7aaa419d1 --- /dev/null +++ b/lib/Tesseract-OCR/libharfbuzz-0.dll @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e8e04ea363b1a7fa43203b9e9a7ba58cb0d87026ba35bb238d0873d997c0cff +size 1199114 diff --git a/lib/Tesseract-OCR/libiconv-2.dll b/lib/Tesseract-OCR/libiconv-2.dll new file mode 100644 index 0000000000000000000000000000000000000000..65460fc9625621432874c2d672f6b4576995a0ba --- /dev/null +++ b/lib/Tesseract-OCR/libiconv-2.dll @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55de11531dc0e566cb91f26e48d1301a161a4b8b24abed42304d711412368760 +size 1117178 diff --git a/lib/Tesseract-OCR/libicudt72.dll b/lib/Tesseract-OCR/libicudt72.dll new file mode 100644 index 0000000000000000000000000000000000000000..b35edc2d84a2fae88c9d65eedbb495a6435b022d --- /dev/null +++ b/lib/Tesseract-OCR/libicudt72.dll @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8dcf3bc162027ddd04b34021968c6b6b30d8b89a4fb3acdcf59671f99b6a54dd +size 31265798 diff --git a/lib/Tesseract-OCR/libicuin72.dll b/lib/Tesseract-OCR/libicuin72.dll new file mode 100644 index 0000000000000000000000000000000000000000..a18e68264cc430e72ad9072dda5bd4cbb4e70776 --- /dev/null +++ b/lib/Tesseract-OCR/libicuin72.dll @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de243cb07b95ea0b0484289eca3a2a763393369b9cd8366c35c084fb9d45e092 +size 2916371 diff --git a/lib/Tesseract-OCR/libicuuc72.dll b/lib/Tesseract-OCR/libicuuc72.dll new file mode 100644 index 0000000000000000000000000000000000000000..4253001884aece171e4915823536ed081ce21956 --- /dev/null +++ b/lib/Tesseract-OCR/libicuuc72.dll @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c752f30ce2a9c090fc03768b9589cdb58c98217407706683c61ce660b8d9c3c +size 1804704 diff --git a/lib/Tesseract-OCR/libidn2-0.dll b/lib/Tesseract-OCR/libidn2-0.dll new file mode 100644 index 0000000000000000000000000000000000000000..f9b07cba9e59e75f73ea684830fae6ca8e7cb37f Binary files /dev/null and b/lib/Tesseract-OCR/libidn2-0.dll differ diff --git a/lib/Tesseract-OCR/libintl-8.dll b/lib/Tesseract-OCR/libintl-8.dll new file mode 100644 index 0000000000000000000000000000000000000000..3a4119c47d31331b0d2329cacfd79fbb6c64918a Binary files /dev/null and b/lib/Tesseract-OCR/libintl-8.dll differ diff --git a/lib/Tesseract-OCR/libjbig-0.dll b/lib/Tesseract-OCR/libjbig-0.dll new file mode 100644 index 0000000000000000000000000000000000000000..33ff84261bd89d98321b1fda17c1521293b54bd1 Binary files /dev/null and b/lib/Tesseract-OCR/libjbig-0.dll differ diff --git a/lib/Tesseract-OCR/libjpeg-8.dll b/lib/Tesseract-OCR/libjpeg-8.dll new file mode 100644 index 0000000000000000000000000000000000000000..d372bcc2e6e332bc416c629844245e421abbe27e Binary files /dev/null and b/lib/Tesseract-OCR/libjpeg-8.dll differ diff --git a/lib/Tesseract-OCR/libleptonica-6.dll b/lib/Tesseract-OCR/libleptonica-6.dll new file mode 100644 index 0000000000000000000000000000000000000000..53f790a8670df0097c0bbc878161f013600f4800 --- /dev/null +++ b/lib/Tesseract-OCR/libleptonica-6.dll @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97e00d7858e1f8a07ad098a951ef6569268a2234a0b76033ce3ef201e81b0645 +size 2698960 diff --git a/lib/Tesseract-OCR/liblz4.dll b/lib/Tesseract-OCR/liblz4.dll new file mode 100644 index 0000000000000000000000000000000000000000..df85958c15533b23ab75ce58bd42c89ff26ce0e2 Binary files /dev/null and b/lib/Tesseract-OCR/liblz4.dll differ diff --git a/lib/Tesseract-OCR/liblzma-5.dll b/lib/Tesseract-OCR/liblzma-5.dll new file mode 100644 index 0000000000000000000000000000000000000000..172426e689cd7d7b3a91841b09bd37d22750ed66 Binary files /dev/null and b/lib/Tesseract-OCR/liblzma-5.dll differ diff --git a/lib/Tesseract-OCR/libopenjp2-7.dll b/lib/Tesseract-OCR/libopenjp2-7.dll new file mode 100644 index 0000000000000000000000000000000000000000..c34b1ebfc22d2a8278e981d236ac8ab82c2e727e Binary files /dev/null and b/lib/Tesseract-OCR/libopenjp2-7.dll differ diff --git a/lib/Tesseract-OCR/libpango-1.0-0.dll b/lib/Tesseract-OCR/libpango-1.0-0.dll new file mode 100644 index 0000000000000000000000000000000000000000..dceef21383140713eacd0a7e332ac64bca5a1772 Binary files /dev/null and b/lib/Tesseract-OCR/libpango-1.0-0.dll differ diff --git a/lib/Tesseract-OCR/libpangocairo-1.0-0.dll b/lib/Tesseract-OCR/libpangocairo-1.0-0.dll new file mode 100644 index 0000000000000000000000000000000000000000..14fee09c2a73f6b2ee41471508b5c720b43f44a9 Binary files /dev/null and b/lib/Tesseract-OCR/libpangocairo-1.0-0.dll differ diff --git a/lib/Tesseract-OCR/libpangoft2-1.0-0.dll b/lib/Tesseract-OCR/libpangoft2-1.0-0.dll new file mode 100644 index 0000000000000000000000000000000000000000..cad1613460027f79fb0d53196e55298bd41e4ba0 Binary files /dev/null and b/lib/Tesseract-OCR/libpangoft2-1.0-0.dll differ diff --git a/lib/Tesseract-OCR/libpangowin32-1.0-0.dll b/lib/Tesseract-OCR/libpangowin32-1.0-0.dll new file mode 100644 index 0000000000000000000000000000000000000000..6b1e618886689346667f61c9534c2c32181aafbc Binary files /dev/null and b/lib/Tesseract-OCR/libpangowin32-1.0-0.dll differ diff --git a/lib/Tesseract-OCR/libpcre2-8-0.dll b/lib/Tesseract-OCR/libpcre2-8-0.dll new file mode 100644 index 0000000000000000000000000000000000000000..4f87a28e21da495e779c15af7d272401fa91782f Binary files /dev/null and b/lib/Tesseract-OCR/libpcre2-8-0.dll differ diff --git a/lib/Tesseract-OCR/libpixman-1-0.dll b/lib/Tesseract-OCR/libpixman-1-0.dll new file mode 100644 index 0000000000000000000000000000000000000000..491bde2fe8f7f2cb3ac807ea16a40100949e31eb Binary files /dev/null and b/lib/Tesseract-OCR/libpixman-1-0.dll differ diff --git a/lib/Tesseract-OCR/libpng16-16.dll b/lib/Tesseract-OCR/libpng16-16.dll new file mode 100644 index 0000000000000000000000000000000000000000..6b79147871a4a033ca6db5b9b69e922794c79e7a Binary files /dev/null and b/lib/Tesseract-OCR/libpng16-16.dll differ diff --git a/lib/Tesseract-OCR/libpsl-5.dll b/lib/Tesseract-OCR/libpsl-5.dll new file mode 100644 index 0000000000000000000000000000000000000000..d0e0242459271cf8701fb288e9365a0a535e70f8 Binary files /dev/null and b/lib/Tesseract-OCR/libpsl-5.dll differ diff --git a/lib/Tesseract-OCR/libsharpyuv-0.dll b/lib/Tesseract-OCR/libsharpyuv-0.dll new file mode 100644 index 0000000000000000000000000000000000000000..e127306bc1cbd9d66a5feab7a700343bde4622cc Binary files /dev/null and b/lib/Tesseract-OCR/libsharpyuv-0.dll differ diff --git a/lib/Tesseract-OCR/libssh2-1.dll b/lib/Tesseract-OCR/libssh2-1.dll new file mode 100644 index 0000000000000000000000000000000000000000..248a47868a43c646f4fed0df4a4440609c930901 Binary files /dev/null and b/lib/Tesseract-OCR/libssh2-1.dll differ diff --git a/lib/Tesseract-OCR/libstdc++-6.dll b/lib/Tesseract-OCR/libstdc++-6.dll new file mode 100644 index 0000000000000000000000000000000000000000..e29f3f9d5b4ab29e428e14ffbd9d973af68e2375 --- /dev/null +++ b/lib/Tesseract-OCR/libstdc++-6.dll @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9c27574f5fadd4cb736612d26c237e66d3efc604c7f4354cbd5130ee61a8890 +size 2025890 diff --git a/lib/Tesseract-OCR/libtesseract-5.dll b/lib/Tesseract-OCR/libtesseract-5.dll new file mode 100644 index 0000000000000000000000000000000000000000..7bc0aa1327ab6e158a425e855c32ca0a68a10a39 --- /dev/null +++ b/lib/Tesseract-OCR/libtesseract-5.dll @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37f927a2cc81672b3f0c9f9ea1b890c8cac38639653369d4294e20df79ea5839 +size 99080152 diff --git a/lib/Tesseract-OCR/libthai-0.dll b/lib/Tesseract-OCR/libthai-0.dll new file mode 100644 index 0000000000000000000000000000000000000000..37370766fa0e8f59bb438e1f97f3234b86f65caa Binary files /dev/null and b/lib/Tesseract-OCR/libthai-0.dll differ diff --git a/lib/Tesseract-OCR/libtiff-6.dll b/lib/Tesseract-OCR/libtiff-6.dll new file mode 100644 index 0000000000000000000000000000000000000000..addf703c98b74c560195d754f66e260f43892fd8 Binary files /dev/null and b/lib/Tesseract-OCR/libtiff-6.dll differ diff --git a/lib/Tesseract-OCR/libunistring-5.dll b/lib/Tesseract-OCR/libunistring-5.dll new file mode 100644 index 0000000000000000000000000000000000000000..eb931731b3f7b744fb193a4666193e4082419c06 --- /dev/null +++ b/lib/Tesseract-OCR/libunistring-5.dll @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd08541db31221a2e26238cd6a2bcd6a9ebb9a4c4205c09b1b2050fde409f284 +size 1989559 diff --git a/lib/Tesseract-OCR/libwebp-7.dll b/lib/Tesseract-OCR/libwebp-7.dll new file mode 100644 index 0000000000000000000000000000000000000000..87cae80f50c166a95a60fef8665ba37bfeb6b3b3 Binary files /dev/null and b/lib/Tesseract-OCR/libwebp-7.dll differ diff --git a/lib/Tesseract-OCR/libwebpmux-3.dll b/lib/Tesseract-OCR/libwebpmux-3.dll new file mode 100644 index 0000000000000000000000000000000000000000..7ddd1a94ff06d9acd9a07a36a248f979222cd893 Binary files /dev/null and b/lib/Tesseract-OCR/libwebpmux-3.dll differ diff --git a/lib/Tesseract-OCR/libwinpthread-1.dll b/lib/Tesseract-OCR/libwinpthread-1.dll new file mode 100644 index 0000000000000000000000000000000000000000..e7e77e8989b695b20acbed93012ed80a2fce3177 Binary files /dev/null and b/lib/Tesseract-OCR/libwinpthread-1.dll differ diff --git a/lib/Tesseract-OCR/libzstd.dll b/lib/Tesseract-OCR/libzstd.dll new file mode 100644 index 0000000000000000000000000000000000000000..86decddb6f2ed70ff64ec2f5d025ea801264e13c --- /dev/null +++ b/lib/Tesseract-OCR/libzstd.dll @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c6e9f7124b225c9e4608d623c00ba77f0df34b04b66c0d166f877d9dc4ceac8 +size 1136840 diff --git a/lib/Tesseract-OCR/lstmeval.1.html b/lib/Tesseract-OCR/lstmeval.1.html new file mode 100644 index 0000000000000000000000000000000000000000..b982bd5b0257d281a51bc4c7085a2e90870b2cf5 --- /dev/null +++ b/lib/Tesseract-OCR/lstmeval.1.html @@ -0,0 +1,847 @@ + + + + + +LSTMEVAL(1) + + + + + +
+
+

SYNOPSIS

+
+

lstmeval --model lang.lstm|modelname_checkpoint|modelname_N.NN_NN_NN.checkpoint [--traineddata lang/lang.traineddata] --eval_listfile lang.eval_files.txt [--verbosity N] [--max_image_MB NNNN]

+
+
+
+

DESCRIPTION

+
+

lstmeval(1) evaluates LSTM-based networks. Either a recognition model or a training checkpoint can be given as input for evaluation along with a list of lstmf files. If evaluating a training checkpoint, --traineddata should also be specified. Intermediate training checkpoints can also be used.

+
+
+
+

OPTIONS

+
+
+
+--model FILE +
+
+

+ Name of model file (training or recognition) (type:string default:) +

+
+
+--traineddata FILE +
+
+

+ If model is a training checkpoint, then traineddata must be the traineddata file that was given to the trainer (type:string default:) +

+
+
+--eval_listfile FILE +
+
+

+ File listing sample files in lstmf training format. (type:string default:) +

+
+
+--max_image_MB INT +
+
+

+ Max memory to use for images. (type:int default:2000) +

+
+
+--verbosity INT +
+
+

+ Amount of diagnosting information to output (0-2). (type:int default:1) +

+
+
+
+
+
+

HISTORY

+
+

lstmeval(1) was first made available for tesseract4.00.00alpha.

+
+
+
+

RESOURCES

+ +
+
+

SEE ALSO

+
+

tesseract(1)

+
+
+
+

COPYING

+
+

Copyright (C) 2012 Google, Inc. +Licensed under the Apache License, Version 2.0

+
+
+
+

AUTHOR

+
+

The Tesseract OCR engine was written by Ray Smith and his research groups +at Hewlett Packard (1985-1995) and Google (2006-present).

+
+
+
+

+ + + diff --git a/lib/Tesseract-OCR/lstmeval.exe b/lib/Tesseract-OCR/lstmeval.exe new file mode 100644 index 0000000000000000000000000000000000000000..fd7b811da73745eb5043a8bf22ccd5f822138235 --- /dev/null +++ b/lib/Tesseract-OCR/lstmeval.exe @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:421360d29a656c86b1ff51070eb93223747018a75cfd33287ef061af45c44e7e +size 8806824 diff --git a/lib/Tesseract-OCR/lstmtraining.1.html b/lib/Tesseract-OCR/lstmtraining.1.html new file mode 100644 index 0000000000000000000000000000000000000000..1da64ba678b493320283ed087e489a6176501950 --- /dev/null +++ b/lib/Tesseract-OCR/lstmtraining.1.html @@ -0,0 +1,999 @@ + + + + + +LSTMTRAINING(1) + + + + + +
+
+

SYNOPSIS

+
+

lstmtraining + --continue_from train_output_dir/continue_from_lang.lstm + --old_traineddata bestdata_dir/continue_from_lang.traineddata + --traineddata train_output_dir/lang/lang.traineddata + --max_iterations NNN + --debug_interval 0|-1 + --train_listfile train_output_dir/lang.training_files.txt + --model_output train_output_dir/newlstmmodel

+
+
+
+

DESCRIPTION

+
+

lstmtraining(1) trains LSTM-based networks using a list of lstmf files and starter traineddata file as the main input. Training from scratch is not recommended to be done by users. Finetuning (example command shown in synopsis above) or replacing a layer options can be used instead. Different options apply to different types of training. +Read the [training documentation](https://tesseract-ocr.github.io/tessdoc/TrainingTesseract-4.00.html) for details.

+
+
+
+

OPTIONS

+
+
+
+'--debug_interval ' +
+
+

+ How often to display the alignment. (type:int default:0) +

+
+
+'--net_mode ' +
+
+

+ Controls network behavior. (type:int default:192) +

+
+
+'--perfect_sample_delay ' +
+
+

+ How many imperfect samples between perfect ones. (type:int default:0) +

+
+
+'--max_image_MB ' +
+
+

+ Max memory to use for images. (type:int default:6000) +

+
+
+'--append_index ' +
+
+

+ Index in continue_from Network at which to attach the new network defined by net_spec (type:int default:-1) +

+
+
+'--max_iterations ' +
+
+

+ If set, exit after this many iterations. A negative value is interpreted as epochs, 0 means infinite iterations. (type:int default:0) +

+
+
+'--target_error_rate ' +
+
+

+ Final error rate in percent. (type:double default:0.01) +

+
+
+'--weight_range ' +
+
+

+ Range of initial random weights. (type:double default:0.1) +

+
+
+'--learning_rate ' +
+
+

+ Weight factor for new deltas. (type:double default:0.001) +

+
+
+'--momentum ' +
+
+

+ Decay factor for repeating deltas. (type:double default:0.5) +

+
+
+'--adam_beta ' +
+
+

+ Decay factor for repeating deltas. (type:double default:0.999) +

+
+
+'--stop_training ' +
+
+

+ Just convert the training model to a runtime model. (type:bool default:false) +

+
+
+'--convert_to_int ' +
+
+

+ Convert the recognition model to an integer model. (type:bool default:false) +

+
+
+'--sequential_training ' +
+
+

+ Use the training files sequentially instead of round-robin. (type:bool default:false) +

+
+
+'--debug_network ' +
+
+

+ Get info on distribution of weight values (type:bool default:false) +

+
+
+'--randomly_rotate ' +
+
+

+ Train OSD and randomly turn training samples upside-down (type:bool default:false) +

+
+
+'--net_spec ' +
+
+

+ Network specification (type:string default:) +

+
+
+'--continue_from ' +
+
+

+ Existing model to extend (type:string default:) +

+
+
+'--model_output ' +
+
+

+ Basename for output models (type:string default:lstmtrain) +

+
+
+'--train_listfile ' +
+
+

+ File listing training files in lstmf training format. (type:string default:) +

+
+
+'--eval_listfile ' +
+
+

+ File listing eval files in lstmf training format. (type:string default:) +

+
+
+'--traineddata ' +
+
+

+ Starter traineddata with combined Dawgs/Unicharset/Recoder for language model (type:string default:) +

+
+
+'--old_traineddata ' +
+
+

+ When changing the character set, this specifies the traineddata with the old character set that is to be replaced (type:string default:) +

+
+
+
+
+
+

HISTORY

+
+

lstmtraining(1) was first made available for tesseract4.00.00alpha.

+
+
+
+

RESOURCES

+ +
+
+

SEE ALSO

+
+

tesseract(1)

+
+
+
+

COPYING

+
+

Copyright (C) 2012 Google, Inc. +Licensed under the Apache License, Version 2.0

+
+
+
+

AUTHOR

+
+

The Tesseract OCR engine was written by Ray Smith and his research groups +at Hewlett Packard (1985-1995) and Google (2006-present).

+
+
+
+

+ + + diff --git a/lib/Tesseract-OCR/lstmtraining.exe b/lib/Tesseract-OCR/lstmtraining.exe new file mode 100644 index 0000000000000000000000000000000000000000..8baa7db509a65d9072ce56fe48b7ee4de773ce8d --- /dev/null +++ b/lib/Tesseract-OCR/lstmtraining.exe @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6cd1e6d6e152e59150df595460fbe117a88377a86b3e0200ed2a29a8f7796a6b +size 9845144 diff --git a/lib/Tesseract-OCR/merge_unicharsets.1.html b/lib/Tesseract-OCR/merge_unicharsets.1.html new file mode 100644 index 0000000000000000000000000000000000000000..543b8d08544c3341a4ec9db5521716d6f3db4d59 --- /dev/null +++ b/lib/Tesseract-OCR/merge_unicharsets.1.html @@ -0,0 +1,833 @@ + + + + + +MERGE_UNICHARSETS(1) + + + + + +
+
+

SYNOPSIS

+
+

merge_unicharsets unicharset-in-1unicharset-in-n unicharset-out

+
+
+
+

DESCRIPTION

+
+

merge_unicharsets(1) is a simple tool to merge two or more unicharsets. +It could be used to create a combined unicharset for a script-level engine, +like the new Latin or Devanagari.

+
+
+
+

IN/OUT ARGUMENTS

+
+
+
+unicharset-in-1 +
+
+

+ (Input) The name of the first unicharset file to be merged. +

+
+
+unicharset-in-n +
+
+

+ (Input) The name of the nth unicharset file to be merged. +

+
+
+unicharset-out +
+
+

+ (Output) The name of the merged unicharset file. +

+
+
+
+
+
+

HISTORY

+
+

merge_unicharsets(1) was first made available for tesseract4.00.00alpha.

+
+
+
+

RESOURCES

+ +
+
+

SEE ALSO

+
+

tesseract(1)

+
+
+
+

COPYING

+
+

Copyright (C) 2012 Google, Inc. +Licensed under the Apache License, Version 2.0

+
+
+
+

AUTHOR

+
+

The Tesseract OCR engine was written by Ray Smith and his research groups +at Hewlett Packard (1985-1995) and Google (2006-present).

+
+
+
+

+ + + diff --git a/lib/Tesseract-OCR/merge_unicharsets.exe b/lib/Tesseract-OCR/merge_unicharsets.exe new file mode 100644 index 0000000000000000000000000000000000000000..787ddf3081dbd8e18619a7672aa77b971d7e001c Binary files /dev/null and b/lib/Tesseract-OCR/merge_unicharsets.exe differ diff --git a/lib/Tesseract-OCR/mftraining.1.html b/lib/Tesseract-OCR/mftraining.1.html new file mode 100644 index 0000000000000000000000000000000000000000..718cbd1efacae92e0c4059ab0120a7dece845e55 --- /dev/null +++ b/lib/Tesseract-OCR/mftraining.1.html @@ -0,0 +1,847 @@ + + + + + +MFTRAINING(1) + + + + + +
+
+

SYNOPSIS

+
+

mftraining -U unicharset -O lang.unicharset FILE

+
+
+
+

DESCRIPTION

+
+

mftraining takes a list of .tr files, from which it generates the +files inttemp (the shape prototypes), shapetable, and pffmtable +(the number of expected features for each character). (A fourth file +called Microfeat is also written by this program, but it is not used.)

+
+
+
+

OPTIONS

+
+
+
+-U FILE +
+
+

+ (Input) The unicharset generated by unicharset_extractor(1) +

+
+
+-F font_properties_file +
+
+

+ (Input) font properties file, each line is of the following form, where each field other than the font name is 0 or 1: +

+
+
+
*font_name* *italic* *bold* *fixed_pitch* *serif* *fraktur*
+
+
+
+-X xheights_file +
+
+

+ (Input) x heights file, each line is of the following form, where xheight is calculated as the pixel x height of a character drawn at 32pt on 300 dpi. [ That is, if base x height + ascenders + descenders = 133, how much is x height? ] +

+
+
+
*font_name* *xheight*
+
+
+
+-D dir +
+
+

+ Directory to write output files to. +

+
+
+-O FILE +
+
+

+ (Output) The output unicharset that will be given to combine_tessdata(1) +

+
+
+
+
+
+

SEE ALSO

+
+

tesseract(1), cntraining(1), unicharset_extractor(1), combine_tessdata(1), +shapeclustering(1), unicharset(5)

+ +
+
+
+

COPYING

+
+

Copyright (C) Hewlett-Packard Company, 1988 +Licensed under the Apache License, Version 2.0

+
+
+
+

AUTHOR

+
+

The Tesseract OCR engine was written by Ray Smith and his research groups +at Hewlett Packard (1985-1995) and Google (2006-present).

+
+
+
+

+ + + diff --git a/lib/Tesseract-OCR/mftraining.exe b/lib/Tesseract-OCR/mftraining.exe new file mode 100644 index 0000000000000000000000000000000000000000..495c8b74f6968979fa2caf74cc1a607bbae10096 --- /dev/null +++ b/lib/Tesseract-OCR/mftraining.exe @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44752b052257e25b0729a2c94eef34ee6355a4c45d1a01697ef94115f08a74ad +size 5307936 diff --git a/lib/Tesseract-OCR/set_unicharset_properties.1.html b/lib/Tesseract-OCR/set_unicharset_properties.1.html new file mode 100644 index 0000000000000000000000000000000000000000..146dd96c2e2d731193395c26836a5ddbe6ce8807 --- /dev/null +++ b/lib/Tesseract-OCR/set_unicharset_properties.1.html @@ -0,0 +1,831 @@ + + + + + +SET_UNICHARSET_PROPERTIES(1) + + + + + +
+
+

SYNOPSIS

+
+

set_unicharset_properties --U input_unicharsetfile --script_dir /path/to/langdata --O output_unicharsetfile

+
+
+
+

DESCRIPTION

+
+

set_unicharset_properties(1) reads a unicharset file, puts the result in a UNICHARSET object, fills it with properties about the unichars it contains and writes the result back to another unicharset file.

+
+
+
+

OPTIONS

+
+
+
+--script_dir /path/to/langdata +
+
+

+ (Input) Specify the location of directory for universal script unicharsets and font xheights (type:string default:) +

+
+
+--U unicharsetfile +
+
+

+ (Input) Specify the location of the unicharset to load as input. +

+
+
+--O unicharsetfile +
+
+

+ (Output) Specify the location of the unicharset to be written with updated properties. +

+
+
+
+
+
+

HISTORY

+
+

set_unicharset_properties(1) was first made available for tesseract version 3.03.

+
+
+ +
+

SEE ALSO

+
+

tesseract(1)

+
+
+
+

COPYING

+
+

Copyright (C) 2012 Google, Inc. +Licensed under the Apache License, Version 2.0

+
+
+
+

AUTHOR

+
+

The Tesseract OCR engine was written by Ray Smith and his research groups +at Hewlett Packard (1985-1995) and Google (2006-present).

+
+
+
+

+ + + diff --git a/lib/Tesseract-OCR/set_unicharset_properties.exe b/lib/Tesseract-OCR/set_unicharset_properties.exe new file mode 100644 index 0000000000000000000000000000000000000000..389066e5487f6f954bd47731f0287240bcab89f7 --- /dev/null +++ b/lib/Tesseract-OCR/set_unicharset_properties.exe @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dea11735e228450b3e7d8639860c6f3a99c159bcf4c2e072fe9bb4cb99ca499c +size 6484416 diff --git a/lib/Tesseract-OCR/shapeclustering.1.html b/lib/Tesseract-OCR/shapeclustering.1.html new file mode 100644 index 0000000000000000000000000000000000000000..673f603201f455a89a2fcd3263286e0cac1cfdc2 --- /dev/null +++ b/lib/Tesseract-OCR/shapeclustering.1.html @@ -0,0 +1,850 @@ + + + + + +SHAPECLUSTERING(1) + + + + + +
+
+

SYNOPSIS

+
+

shapeclustering -D output_dir + -U unicharset -O mfunicharset + -F font_props -X xheights + FILE

+
+
+
+

DESCRIPTION

+
+

shapeclustering(1) takes extracted feature .tr files (generated by +tesseract(1) run in a special mode from box files) and produces a +file shapetable and an enhanced unicharset. This program is still +experimental, and is not required (yet) for training Tesseract.

+
+
+
+

OPTIONS

+
+
+
+-U FILE +
+
+

+ The unicharset generated by unicharset_extractor(1). +

+
+
+-D dir +
+
+

+ Directory to write output files to. +

+
+
+-F font_properties_file +
+
+

+ (Input) font properties file, where each line is of the following form, where each field other than the font name is 0 or 1: +

+
+
+
'font_name' 'italic' 'bold' 'fixed_pitch' 'serif' 'fraktur'
+
+
+
+-X xheights_file +
+
+

+ (Input) x heights file, each line is of the following form, where xheight is calculated as the pixel x height of a character drawn at 32pt on 300 dpi. [ That is, if base x height + ascenders + descenders = 133, how much is x height? ] +

+
+
+
'font_name' 'xheight'
+
+
+
+-O FILE +
+
+

+ The output unicharset that will be given to combine_tessdata(1). +

+
+
+
+
+
+

SEE ALSO

+
+

tesseract(1), cntraining(1), unicharset_extractor(1), combine_tessdata(1), +unicharset(5)

+ +
+
+
+

COPYING

+
+

Copyright (C) Google, 2011 +Licensed under the Apache License, Version 2.0

+
+
+
+

AUTHOR

+
+

The Tesseract OCR engine was written by Ray Smith and his research groups +at Hewlett Packard (1985-1995) and Google (2006-present).

+
+
+
+

+ + + diff --git a/lib/Tesseract-OCR/shapeclustering.exe b/lib/Tesseract-OCR/shapeclustering.exe new file mode 100644 index 0000000000000000000000000000000000000000..5069dcf34d08ccb46a47c5ad4021b332492e6b7a --- /dev/null +++ b/lib/Tesseract-OCR/shapeclustering.exe @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b979ea98f0d8f969384980c92e795e0d4945529263c3e64fffca1d275e947403 +size 4986944 diff --git a/lib/Tesseract-OCR/tessdata/ScrollView.jar b/lib/Tesseract-OCR/tessdata/ScrollView.jar new file mode 100644 index 0000000000000000000000000000000000000000..ddf60d9339c5737ac2c54d491430353be629815d Binary files /dev/null and b/lib/Tesseract-OCR/tessdata/ScrollView.jar differ diff --git a/lib/Tesseract-OCR/tessdata/configs/alto b/lib/Tesseract-OCR/tessdata/configs/alto new file mode 100644 index 0000000000000000000000000000000000000000..0dd12a7a709c14e267fb7141cd9ef705102b8a3a --- /dev/null +++ b/lib/Tesseract-OCR/tessdata/configs/alto @@ -0,0 +1 @@ +tessedit_create_alto 1 diff --git a/lib/Tesseract-OCR/tessdata/configs/ambigs.train b/lib/Tesseract-OCR/tessdata/configs/ambigs.train new file mode 100644 index 0000000000000000000000000000000000000000..23035a1904cfb8a2e5ad143ac638447bc1b04b4c --- /dev/null +++ b/lib/Tesseract-OCR/tessdata/configs/ambigs.train @@ -0,0 +1,7 @@ +tessedit_ambigs_training 1 +load_freq_dawg 0 +load_punc_dawg 0 +load_system_dawg 0 +load_number_dawg 0 +ambigs_debug_level 3 +load_fixed_length_dawgs 0 diff --git a/lib/Tesseract-OCR/tessdata/configs/api_config b/lib/Tesseract-OCR/tessdata/configs/api_config new file mode 100644 index 0000000000000000000000000000000000000000..5cd6ec0310213adbc59e5c48a49f858daf3cdc4f --- /dev/null +++ b/lib/Tesseract-OCR/tessdata/configs/api_config @@ -0,0 +1 @@ +tessedit_zero_rejection T diff --git a/lib/Tesseract-OCR/tessdata/configs/bigram b/lib/Tesseract-OCR/tessdata/configs/bigram new file mode 100644 index 0000000000000000000000000000000000000000..5d6c2d061f4a0bae8ab3b2270da8e6744a048d11 --- /dev/null +++ b/lib/Tesseract-OCR/tessdata/configs/bigram @@ -0,0 +1,5 @@ +load_bigram_dawg True +tessedit_enable_bigram_correction True +tessedit_bigram_debug 3 +save_raw_choices True +save_alt_choices True diff --git a/lib/Tesseract-OCR/tessdata/configs/box.train b/lib/Tesseract-OCR/tessdata/configs/box.train new file mode 100644 index 0000000000000000000000000000000000000000..d39f2687ef8c50f090895ba1f26367e25613a685 --- /dev/null +++ b/lib/Tesseract-OCR/tessdata/configs/box.train @@ -0,0 +1,12 @@ +disable_character_fragments T +file_type .bl +textord_fast_pitch_test T +tessedit_zero_rejection T +tessedit_minimal_rejection F +tessedit_write_rep_codes F +edges_children_fix F +edges_childarea 0.65 +edges_boxarea 0.9 +tessedit_resegment_from_boxes T +tessedit_train_from_boxes T +textord_no_rejects T diff --git a/lib/Tesseract-OCR/tessdata/configs/box.train.stderr b/lib/Tesseract-OCR/tessdata/configs/box.train.stderr new file mode 100644 index 0000000000000000000000000000000000000000..82754e9cc90b7da072c9074e7ee1492beaa72db6 --- /dev/null +++ b/lib/Tesseract-OCR/tessdata/configs/box.train.stderr @@ -0,0 +1,13 @@ +file_type .bl +#tessedit_use_nn F +textord_fast_pitch_test T +tessedit_zero_rejection T +tessedit_minimal_rejection F +tessedit_write_rep_codes F +edges_children_fix F +edges_childarea 0.65 +edges_boxarea 0.9 +tessedit_resegment_from_boxes T +tessedit_train_from_boxes T +#textord_repeat_extraction F +textord_no_rejects T diff --git a/lib/Tesseract-OCR/tessdata/configs/digits b/lib/Tesseract-OCR/tessdata/configs/digits new file mode 100644 index 0000000000000000000000000000000000000000..6a329f892910ae9dd7af1f9fe8f7a1d48378fd8b --- /dev/null +++ b/lib/Tesseract-OCR/tessdata/configs/digits @@ -0,0 +1 @@ +tessedit_char_whitelist 0123456789-. diff --git a/lib/Tesseract-OCR/tessdata/configs/get.images b/lib/Tesseract-OCR/tessdata/configs/get.images new file mode 100644 index 0000000000000000000000000000000000000000..7d00b613ffcbf7e4fa712d2c50a85c7643a027e3 --- /dev/null +++ b/lib/Tesseract-OCR/tessdata/configs/get.images @@ -0,0 +1 @@ +tessedit_write_images T diff --git a/lib/Tesseract-OCR/tessdata/configs/hocr b/lib/Tesseract-OCR/tessdata/configs/hocr new file mode 100644 index 0000000000000000000000000000000000000000..5ab372eaf819b05bdd87ba419c874f6a1be4677b --- /dev/null +++ b/lib/Tesseract-OCR/tessdata/configs/hocr @@ -0,0 +1,2 @@ +tessedit_create_hocr 1 +hocr_font_info 0 diff --git a/lib/Tesseract-OCR/tessdata/configs/inter b/lib/Tesseract-OCR/tessdata/configs/inter new file mode 100644 index 0000000000000000000000000000000000000000..252f1a171a154f9ade798e210015a720af039d00 --- /dev/null +++ b/lib/Tesseract-OCR/tessdata/configs/inter @@ -0,0 +1,2 @@ +interactive_display_mode T +tessedit_display_outwords T diff --git a/lib/Tesseract-OCR/tessdata/configs/kannada b/lib/Tesseract-OCR/tessdata/configs/kannada new file mode 100644 index 0000000000000000000000000000000000000000..c6ac105788137bc4e89821e94843ea86ed5b4564 --- /dev/null +++ b/lib/Tesseract-OCR/tessdata/configs/kannada @@ -0,0 +1,4 @@ +textord_skewsmooth_offset 8 +textord_skewsmooth_offset2 8 +textord_merge_desc 0.5 +textord_no_rejects 1 diff --git a/lib/Tesseract-OCR/tessdata/configs/linebox b/lib/Tesseract-OCR/tessdata/configs/linebox new file mode 100644 index 0000000000000000000000000000000000000000..bd9c114df65ddf13e640298075adb940225c5f96 --- /dev/null +++ b/lib/Tesseract-OCR/tessdata/configs/linebox @@ -0,0 +1,2 @@ +tessedit_resegment_from_line_boxes 1 +tessedit_make_boxes_from_boxes 1 diff --git a/lib/Tesseract-OCR/tessdata/configs/logfile b/lib/Tesseract-OCR/tessdata/configs/logfile new file mode 100644 index 0000000000000000000000000000000000000000..a160f9be275a70fe3af1935fb8fe7af29efa8451 --- /dev/null +++ b/lib/Tesseract-OCR/tessdata/configs/logfile @@ -0,0 +1 @@ +debug_file tesseract.log diff --git a/lib/Tesseract-OCR/tessdata/configs/lstm.train b/lib/Tesseract-OCR/tessdata/configs/lstm.train new file mode 100644 index 0000000000000000000000000000000000000000..5ff37726211ab360ceead7c76a1b52613cc2f277 --- /dev/null +++ b/lib/Tesseract-OCR/tessdata/configs/lstm.train @@ -0,0 +1,11 @@ +file_type .bl +textord_fast_pitch_test T +tessedit_zero_rejection T +tessedit_minimal_rejection F +tessedit_write_rep_codes F +edges_children_fix F +edges_childarea 0.65 +edges_boxarea 0.9 +tessedit_train_line_recognizer T +textord_no_rejects T +tessedit_init_config_only T diff --git a/lib/Tesseract-OCR/tessdata/configs/lstmbox b/lib/Tesseract-OCR/tessdata/configs/lstmbox new file mode 100644 index 0000000000000000000000000000000000000000..a6f2cedc504e9010eec3bfb0b1336b75ef0c28e5 --- /dev/null +++ b/lib/Tesseract-OCR/tessdata/configs/lstmbox @@ -0,0 +1 @@ +tessedit_create_lstmbox 1 diff --git a/lib/Tesseract-OCR/tessdata/configs/lstmdebug b/lib/Tesseract-OCR/tessdata/configs/lstmdebug new file mode 100644 index 0000000000000000000000000000000000000000..3fa3dee71aafe30913c1863a5e67529872984743 --- /dev/null +++ b/lib/Tesseract-OCR/tessdata/configs/lstmdebug @@ -0,0 +1,4 @@ +stopper_debug_level 1 +classify_debug_level 1 +segsearch_debug_level 1 +language_model_debug_level 3 diff --git a/lib/Tesseract-OCR/tessdata/configs/makebox b/lib/Tesseract-OCR/tessdata/configs/makebox new file mode 100644 index 0000000000000000000000000000000000000000..3d90ac26f9542c6beac1082b2d900859906af8e9 --- /dev/null +++ b/lib/Tesseract-OCR/tessdata/configs/makebox @@ -0,0 +1 @@ +tessedit_create_boxfile 1 diff --git a/lib/Tesseract-OCR/tessdata/configs/pdf b/lib/Tesseract-OCR/tessdata/configs/pdf new file mode 100644 index 0000000000000000000000000000000000000000..59645d71ce52a143d819f2057c8c4e9ce2d46e40 --- /dev/null +++ b/lib/Tesseract-OCR/tessdata/configs/pdf @@ -0,0 +1 @@ +tessedit_create_pdf 1 diff --git a/lib/Tesseract-OCR/tessdata/configs/quiet b/lib/Tesseract-OCR/tessdata/configs/quiet new file mode 100644 index 0000000000000000000000000000000000000000..35b59a9d41dd462c6d13b2301d4b2c31219c582f --- /dev/null +++ b/lib/Tesseract-OCR/tessdata/configs/quiet @@ -0,0 +1 @@ +debug_file /dev/null diff --git a/lib/Tesseract-OCR/tessdata/configs/rebox b/lib/Tesseract-OCR/tessdata/configs/rebox new file mode 100644 index 0000000000000000000000000000000000000000..f8342b4c2c7eb733e1d4078f32a0aa5aee677cc3 --- /dev/null +++ b/lib/Tesseract-OCR/tessdata/configs/rebox @@ -0,0 +1,2 @@ +tessedit_resegment_from_boxes 1 +tessedit_make_boxes_from_boxes 1 diff --git a/lib/Tesseract-OCR/tessdata/configs/strokewidth b/lib/Tesseract-OCR/tessdata/configs/strokewidth new file mode 100644 index 0000000000000000000000000000000000000000..e95b59263daf6d43f1b20682a4fa79d386484536 --- /dev/null +++ b/lib/Tesseract-OCR/tessdata/configs/strokewidth @@ -0,0 +1,12 @@ +textord_show_blobs 0 +textord_debug_tabfind 3 +textord_tabfind_show_partitions 1 +textord_tabfind_show_initial_partitions 1 +textord_tabfind_show_columns 1 +textord_tabfind_show_blocks 1 +textord_tabfind_show_initialtabs 1 +textord_tabfind_show_finaltabs 1 +textord_tabfind_show_strokewidths 1 +textord_tabfind_show_vlines 0 +textord_tabfind_show_images 1 +tessedit_dump_pageseg_images 0 diff --git a/lib/Tesseract-OCR/tessdata/configs/tsv b/lib/Tesseract-OCR/tessdata/configs/tsv new file mode 100644 index 0000000000000000000000000000000000000000..dc52478177fd6fb7b1fe278e1374c2054f3e2442 --- /dev/null +++ b/lib/Tesseract-OCR/tessdata/configs/tsv @@ -0,0 +1 @@ +tessedit_create_tsv 1 diff --git a/lib/Tesseract-OCR/tessdata/configs/txt b/lib/Tesseract-OCR/tessdata/configs/txt new file mode 100644 index 0000000000000000000000000000000000000000..a0cc952977f0f3562a5c94011c13044ace865519 --- /dev/null +++ b/lib/Tesseract-OCR/tessdata/configs/txt @@ -0,0 +1,3 @@ +# This config file should be used with other config files which create renderers. +# usage example: tesseract eurotext.tif eurotext txt hocr pdf +tessedit_create_txt 1 diff --git a/lib/Tesseract-OCR/tessdata/configs/unlv b/lib/Tesseract-OCR/tessdata/configs/unlv new file mode 100644 index 0000000000000000000000000000000000000000..d2e22f5b93585032eef94f22966329144ba44d6f --- /dev/null +++ b/lib/Tesseract-OCR/tessdata/configs/unlv @@ -0,0 +1,2 @@ +tessedit_write_unlv 1 +unlv_tilde_crunching T diff --git a/lib/Tesseract-OCR/tessdata/configs/wordstrbox b/lib/Tesseract-OCR/tessdata/configs/wordstrbox new file mode 100644 index 0000000000000000000000000000000000000000..38cd41cd60f89ec7ab3bc5161e094b20f06a3361 --- /dev/null +++ b/lib/Tesseract-OCR/tessdata/configs/wordstrbox @@ -0,0 +1 @@ +tessedit_create_wordstrbox 1 diff --git a/lib/Tesseract-OCR/tessdata/eng.traineddata b/lib/Tesseract-OCR/tessdata/eng.traineddata new file mode 100644 index 0000000000000000000000000000000000000000..b15334db4df434b6e9b7e8119fe96f85f855589b --- /dev/null +++ b/lib/Tesseract-OCR/tessdata/eng.traineddata @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d4322bd2a7749724879683fc3912cb542f19906c83bcc1a52132556427170b2 +size 4113088 diff --git a/lib/Tesseract-OCR/tessdata/eng.user-patterns b/lib/Tesseract-OCR/tessdata/eng.user-patterns new file mode 100644 index 0000000000000000000000000000000000000000..5daba44df897f1c6d67caeb6d0414f7b55625fa1 --- /dev/null +++ b/lib/Tesseract-OCR/tessdata/eng.user-patterns @@ -0,0 +1,2 @@ +1-\d\d\d-GOOG-411 +www.\n\\\*.com diff --git a/lib/Tesseract-OCR/tessdata/eng.user-words b/lib/Tesseract-OCR/tessdata/eng.user-words new file mode 100644 index 0000000000000000000000000000000000000000..e0c5a630214ac69273e2b54107c62ed171fc50a0 --- /dev/null +++ b/lib/Tesseract-OCR/tessdata/eng.user-words @@ -0,0 +1,5 @@ +the +quick +brown +fox +jumped diff --git a/lib/Tesseract-OCR/tessdata/jaxb-api-2.3.1.jar b/lib/Tesseract-OCR/tessdata/jaxb-api-2.3.1.jar new file mode 100644 index 0000000000000000000000000000000000000000..45658654712b88d45c9464286ffc2fcb07036bdf Binary files /dev/null and b/lib/Tesseract-OCR/tessdata/jaxb-api-2.3.1.jar differ diff --git a/lib/Tesseract-OCR/tessdata/osd.traineddata b/lib/Tesseract-OCR/tessdata/osd.traineddata new file mode 100644 index 0000000000000000000000000000000000000000..f4f02c3c55ddbbfdb31fe2687f2852e17fda3d9b --- /dev/null +++ b/lib/Tesseract-OCR/tessdata/osd.traineddata @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cf5d576fcc47564f11265841e5ca839001e7e6f38ff7f7aacf46d15a96b00ff +size 10562727 diff --git a/lib/Tesseract-OCR/tessdata/pdf.ttf b/lib/Tesseract-OCR/tessdata/pdf.ttf new file mode 100644 index 0000000000000000000000000000000000000000..d1472b20ef1aebbf5e11573867e9ac13873681b9 Binary files /dev/null and b/lib/Tesseract-OCR/tessdata/pdf.ttf differ diff --git a/lib/Tesseract-OCR/tessdata/piccolo2d-core-3.0.1.jar b/lib/Tesseract-OCR/tessdata/piccolo2d-core-3.0.1.jar new file mode 100644 index 0000000000000000000000000000000000000000..df84ed5cc8059d640876d3be0ac4c11af0bf0853 Binary files /dev/null and b/lib/Tesseract-OCR/tessdata/piccolo2d-core-3.0.1.jar differ diff --git a/lib/Tesseract-OCR/tessdata/piccolo2d-extras-3.0.1.jar b/lib/Tesseract-OCR/tessdata/piccolo2d-extras-3.0.1.jar new file mode 100644 index 0000000000000000000000000000000000000000..daf51c0ebbea07291c85cab9bc81b19ef0e99efc Binary files /dev/null and b/lib/Tesseract-OCR/tessdata/piccolo2d-extras-3.0.1.jar differ diff --git a/lib/Tesseract-OCR/tessdata/spa.traineddata b/lib/Tesseract-OCR/tessdata/spa.traineddata new file mode 100644 index 0000000000000000000000000000000000000000..edd70875a4d356bc89f69038ee8a4f3fc9cc62ee --- /dev/null +++ b/lib/Tesseract-OCR/tessdata/spa.traineddata @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f2e04d02774a18f01bed44b1111f2cd7f3ba7ac9dc4373cd3f898a40ea6b464 +size 2294433 diff --git a/lib/Tesseract-OCR/tessdata/tessconfigs/batch b/lib/Tesseract-OCR/tessdata/tessconfigs/batch new file mode 100644 index 0000000000000000000000000000000000000000..a681e4a443fa21ce6f32bbcf0334af3433888566 --- /dev/null +++ b/lib/Tesseract-OCR/tessdata/tessconfigs/batch @@ -0,0 +1 @@ +# No content needed as all defaults are correct. diff --git a/lib/Tesseract-OCR/tessdata/tessconfigs/batch.nochop b/lib/Tesseract-OCR/tessdata/tessconfigs/batch.nochop new file mode 100644 index 0000000000000000000000000000000000000000..ebaab9438e309b4dfdfd8428676170ab2b64a858 --- /dev/null +++ b/lib/Tesseract-OCR/tessdata/tessconfigs/batch.nochop @@ -0,0 +1,2 @@ +chop_enable 0 +wordrec_enable_assoc 0 diff --git a/lib/Tesseract-OCR/tessdata/tessconfigs/matdemo b/lib/Tesseract-OCR/tessdata/tessconfigs/matdemo new file mode 100644 index 0000000000000000000000000000000000000000..c34567be7565d519806076b795fceff9fdad1477 --- /dev/null +++ b/lib/Tesseract-OCR/tessdata/tessconfigs/matdemo @@ -0,0 +1,7 @@ +################################################# +# Adaptive Matcher Using PreAdapted Templates +################################################# + +classify_enable_adaptive_debugger 1 +matcher_debug_flags 6 +matcher_debug_level 1 diff --git a/lib/Tesseract-OCR/tessdata/tessconfigs/msdemo b/lib/Tesseract-OCR/tessdata/tessconfigs/msdemo new file mode 100644 index 0000000000000000000000000000000000000000..9c1184a0c84bde5f58ea74a1f316af9eb2574b52 --- /dev/null +++ b/lib/Tesseract-OCR/tessdata/tessconfigs/msdemo @@ -0,0 +1,12 @@ +################################################# +# Adaptive Matcher Using PreAdapted Templates +################################################# + +classify_enable_adaptive_debugger 1 +matcher_debug_flags 6 +matcher_debug_level 1 + +wordrec_display_splits 0 +wordrec_display_all_blobs 1 +wordrec_display_segmentations 2 +classify_debug_level 1 diff --git a/lib/Tesseract-OCR/tessdata/tessconfigs/nobatch b/lib/Tesseract-OCR/tessdata/tessconfigs/nobatch new file mode 100644 index 0000000000000000000000000000000000000000..8b137891791fe96927ad78e64b0aad7bded08bdc --- /dev/null +++ b/lib/Tesseract-OCR/tessdata/tessconfigs/nobatch @@ -0,0 +1 @@ + diff --git a/lib/Tesseract-OCR/tessdata/tessconfigs/segdemo b/lib/Tesseract-OCR/tessdata/tessconfigs/segdemo new file mode 100644 index 0000000000000000000000000000000000000000..eaff69f54b9ed62d49987634a5fba2c79cccf3cf --- /dev/null +++ b/lib/Tesseract-OCR/tessdata/tessconfigs/segdemo @@ -0,0 +1,9 @@ +################################################# +# Adaptive Matcher Using PreAdapted Templates +################################################# + +wordrec_display_splits 0 +wordrec_display_all_blobs 1 +wordrec_display_segmentations 2 +classify_debug_level 1 +stopper_debug_level 1 diff --git a/lib/Tesseract-OCR/tesseract-uninstall.exe b/lib/Tesseract-OCR/tesseract-uninstall.exe new file mode 100644 index 0000000000000000000000000000000000000000..194103b8ed60acfb3063710c1464884b081967b0 Binary files /dev/null and b/lib/Tesseract-OCR/tesseract-uninstall.exe differ diff --git a/lib/Tesseract-OCR/tesseract.1.html b/lib/Tesseract-OCR/tesseract.1.html new file mode 100644 index 0000000000000000000000000000000000000000..93951ed6992f5ddb6d95063e458aabb645e1815d --- /dev/null +++ b/lib/Tesseract-OCR/tesseract.1.html @@ -0,0 +1,1387 @@ + + + + + +TESSERACT(1) + + + + + +
+
+

SYNOPSIS

+
+

tesseract FILE OUTPUTBASE [OPTIONS]… [CONFIGFILE]…

+
+
+
+

DESCRIPTION

+
+

tesseract(1) is a commercial quality OCR engine originally developed at HP +between 1985 and 1995. In 1995, this engine was among the top 3 evaluated by +UNLV. It was open-sourced by HP and UNLV in 2005, and has been developed +at Google since then.

+
+
+
+

IN/OUT ARGUMENTS

+
+
+
+FILE +
+
+

+ The name of the input file. + This can either be an image file or a text file.
+ Most image file formats (anything readable by Leptonica) are supported.
+ A text file lists the names of all input images (one image name per line). + The results will be combined in a single file for each output file format + (txt, pdf, hocr, xml).
+ If FILE is stdin or - then the standard input is used. +

+
+
+OUTPUTBASE +
+
+

+ The basename of the output file (to which the appropriate extension + will be appended). By default the output will be a text file + with .txt added to the basename unless there are one or more + parameters set which explicitly specify the desired output.
+ If OUTPUTBASE is stdout or - then the standard output is used. +

+
+
+
+
+
+

OPTIONS

+
+
+
+-c CONFIGVAR=VALUE +
+
+

+ Set value for parameter CONFIGVAR to VALUE. Multiple -c arguments are allowed. +

+
+
+--dpi N +
+
+

+ Specify the resolution N in DPI for the input image(s). + A typical value for N is 300. Without this option, + the resolution is read from the metadata included in the image. + If an image does not include that information, Tesseract tries to guess it. +

+
+
+-l LANG +
+
+-l SCRIPT +
+
+

+ The language or script to use. + If none is specified, eng (English) is assumed. + Multiple languages may be specified, separated by plus characters. + Tesseract uses 3-character ISO 639-2 language codes + (see LANGUAGES AND SCRIPTS). +

+
+
+--psm N +
+
+

+ Set Tesseract to only run a subset of layout analysis and assume + a certain form of image. The options for N are: +

+
+
+
0 = Orientation and script detection (OSD) only.
+1 = Automatic page segmentation with OSD.
+2 = Automatic page segmentation, but no OSD, or OCR. (not implemented)
+3 = Fully automatic page segmentation, but no OSD. (Default)
+4 = Assume a single column of text of variable sizes.
+5 = Assume a single uniform block of vertically aligned text.
+6 = Assume a single uniform block of text.
+7 = Treat the image as a single text line.
+8 = Treat the image as a single word.
+9 = Treat the image as a single word in a circle.
+10 = Treat the image as a single character.
+11 = Sparse text. Find as much text as possible in no particular order.
+12 = Sparse text with OSD.
+13 = Raw line. Treat the image as a single text line,
+     bypassing hacks that are Tesseract-specific.
+
+
+
+--oem N +
+
+

+ Specify OCR Engine mode. The options for N are: +

+
+
+
0 = Original Tesseract only.
+1 = Neural nets LSTM only.
+2 = Tesseract + LSTM.
+3 = Default, based on what is available.
+
+
+
+--tessdata-dir PATH +
+
+

+ Specify the location of tessdata path. +

+
+
+--user-patterns FILE +
+
+

+ Specify the location of user patterns file. +

+
+
+--user-words FILE +
+
+

+ Specify the location of user words file. +

+
+
+CONFIGFILE +
+
+

+ The name of a config to use. The name can be a file in tessdata/configs + or tessdata/tessconfigs, or an absolute or relative file path. + A config is a plain text file which contains a list of parameters and + their values, one per line, with a space separating parameter from value.
+ Interesting config files include: +

+
    +
  • +

    +alto — Output in ALTO format (OUTPUTBASE.xml). +

    +
  • +
  • +

    +hocr — Output in hOCR format (OUTPUTBASE.hocr). +

    +
  • +
  • +

    +pdf — Output PDF (OUTPUTBASE.pdf). +

    +
  • +
  • +

    +tsv — Output TSV (OUTPUTBASE.tsv). +

    +
  • +
  • +

    +txt — Output plain text (OUTPUTBASE.txt). +

    +
  • +
  • +

    +get.images — Write processed input images to file (OUTPUTBASE.processedPAGENUMBER.tif). +

    +
  • +
  • +

    +logfile — Redirect debug messages to file (tesseract.log). +

    +
  • +
  • +

    +lstm.train — Output files used by LSTM training (OUTPUTBASE.lstmf). +

    +
  • +
  • +

    +makebox — Write box file (OUTPUTBASE.box). +

    +
  • +
  • +

    +quiet — Redirect debug messages to /dev/null. +

    +
  • +
+
+
+

It is possible to select several config files, for example +tesseract image.png demo alto hocr pdf txt will create four output files +demo.alto, demo.hocr, demo.pdf and demo.txt with the OCR results.

+

Nota bene: The options -l LANG, -l SCRIPT and --psm N +must occur before any CONFIGFILE.

+
+
+
+

SINGLE OPTIONS

+
+
+
+-h, --help +
+
+

+ Show help message. +

+
+
+--help-extra +
+
+

+ Show extra help for advanced users. +

+
+
+--help-psm +
+
+

+ Show page segmentation modes. +

+
+
+--help-oem +
+
+

+ Show OCR Engine modes. +

+
+
+-v, --version +
+
+

+ Returns the current version of the tesseract(1) executable. +

+
+
+--list-langs +
+
+

+ List available languages for tesseract engine. + Can be used with --tessdata-dir PATH. +

+
+
+--print-parameters +
+
+

+ Print tesseract parameters. +

+
+
+
+
+
+

LANGUAGES AND SCRIPTS

+
+

To recognize some text with Tesseract, it is normally necessary to specify +the language(s) or script(s) of the text (unless it is English text which is +supported by default) using -l LANG or -l SCRIPT.

+

Selecting a language automatically also selects the language specific +character set and dictionary (word list).

+

Selecting a script typically selects all characters of that script +which can be from different languages. The dictionary which is included +also contains a mix from different languages. +In most cases, a script also supports English. +So it is possible to recognize a language that has not been specifically +trained for by using traineddata for the script it is written in.

+

More than one language or script may be specified by using +. +Example: tesseract myimage.png myimage -l eng+deu+fra.

+

https://github.com/tesseract-ocr/tessdata_fast provides fast language and +script models which are also part of Linux distributions.

+

For Tesseract 4, tessdata_fast includes traineddata files for the +following languages:

+

afr (Afrikaans), +amh (Amharic), +ara (Arabic), +asm (Assamese), +aze (Azerbaijani), +aze_cyrl (Azerbaijani - Cyrilic), +bel (Belarusian), +ben (Bengali), +bod (Tibetan), +bos (Bosnian), +bre (Breton), +bul (Bulgarian), +cat (Catalan; Valencian), +ceb (Cebuano), +ces (Czech), +chi_sim (Chinese simplified), +chi_tra (Chinese traditional), +chr (Cherokee), +cos (Corsican), +cym (Welsh), +dan (Danish), +deu (German), +div (Dhivehi), +dzo (Dzongkha), +ell (Greek, Modern, 1453-), +eng (English), +enm (English, Middle, 1100-1500), +epo (Esperanto), +equ (Math / equation detection module), +est (Estonian), +eus (Basque), +fas (Persian), +fao (Faroese), +fil (Filipino), +fin (Finnish), +fra (French), +frk (Frankish), +frm (French, Middle, ca.1400-1600), +fry (West Frisian), +gla (Scottish Gaelic), +gle (Irish), +glg (Galician), +grc (Greek, Ancient, to 1453), +guj (Gujarati), +hat (Haitian; Haitian Creole), +heb (Hebrew), +hin (Hindi), +hrv (Croatian), +hun (Hungarian), +hye (Armenian), +iku (Inuktitut), +ind (Indonesian), +isl (Icelandic), +ita (Italian), +ita_old (Italian - Old), +jav (Javanese), +jpn (Japanese), +kan (Kannada), +kat (Georgian), +kat_old (Georgian - Old), +kaz (Kazakh), +khm (Central Khmer), +kir (Kirghiz; Kyrgyz), +kmr (Kurdish Kurmanji), +kor (Korean), +kor_vert (Korean vertical), +lao (Lao), +lat (Latin), +lav (Latvian), +lit (Lithuanian), +ltz (Luxembourgish), +mal (Malayalam), +mar (Marathi), +mkd (Macedonian), +mlt (Maltese), +mon (Mongolian), +mri (Maori), +msa (Malay), +mya (Burmese), +nep (Nepali), +nld (Dutch; Flemish), +nor (Norwegian), +oci (Occitan post 1500), +ori (Oriya), +osd (Orientation and script detection module), +pan (Panjabi; Punjabi), +pol (Polish), +por (Portuguese), +pus (Pushto; Pashto), +que (Quechua), +ron (Romanian; Moldavian; Moldovan), +rus (Russian), +san (Sanskrit), +sin (Sinhala; Sinhalese), +slk (Slovak), +slv (Slovenian), +snd (Sindhi), +spa (Spanish; Castilian), +spa_old (Spanish; Castilian - Old), +sqi (Albanian), +srp (Serbian), +srp_latn (Serbian - Latin), +sun (Sundanese), +swa (Swahili), +swe (Swedish), +syr (Syriac), +tam (Tamil), +tat (Tatar), +tel (Telugu), +tgk (Tajik), +tha (Thai), +tir (Tigrinya), +ton (Tonga), +tur (Turkish), +uig (Uighur; Uyghur), +ukr (Ukrainian), +urd (Urdu), +uzb (Uzbek), +uzb_cyrl (Uzbek - Cyrilic), +vie (Vietnamese), +yid (Yiddish), +yor (Yoruba)

+

To use a non-standard language pack named foo.traineddata, set the +TESSDATA_PREFIX environment variable so the file can be found at +TESSDATA_PREFIX/tessdata/foo.traineddata and give Tesseract the +argument -l foo.

+

For Tesseract 4, tessdata_fast includes traineddata files for the +following scripts:

+

Arabic, +Armenian, +Bengali, +Canadian_Aboriginal, +Cherokee, +Cyrillic, +Devanagari, +Ethiopic, +Fraktur, +Georgian, +Greek, +Gujarati, +Gurmukhi, +HanS (Han simplified), +HanS_vert (Han simplified, vertical), +HanT (Han traditional), +HanT_vert (Han traditional, vertical), +Hangul, +Hangul_vert (Hangul vertical), +Hebrew, +Japanese, +Japanese_vert (Japanese vertical), +Kannada, +Khmer, +Lao, +Latin, +Malayalam, +Myanmar, +Oriya (Odia), +Sinhala, +Syriac, +Tamil, +Telugu, +Thaana, +Thai, +Tibetan, +Vietnamese.

+

The same languages and scripts are available from +https://github.com/tesseract-ocr/tessdata_best. +tessdata_best provides slow language and script models. +These models are needed for training. They also can give better OCR results, +but the recognition takes much more time.

+

Both tessdata_fast and tessdata_best only support the LSTM OCR engine.

+

There is a third repository, https://github.com/tesseract-ocr/tessdata, +with models which support both the Tesseract 3 legacy OCR engine and the +Tesseract 4 LSTM OCR engine.

+
+
+
+

CONFIG FILES AND AUGMENTING WITH USER DATA

+
+

Tesseract config files consist of lines with parameter-value pairs (space +separated). The parameters are documented as flags in the source code like +the following one in tesseractclass.h:

+

STRING_VAR_H(tessedit_char_blacklist, "", + "Blacklist of chars not to recognize");

+

These parameters may enable or disable various features of the engine, and +may cause it to load (or not load) various data. For instance, let’s suppose +you want to OCR in English, but suppress the normal dictionary and load an +alternative word list and an alternative list of patterns — these two files +are the most commonly used extra data files.

+

If your language pack is in /path/to/eng.traineddata and the hocr config +is in /path/to/configs/hocr then create three new files:

+

/path/to/eng.user-words:

+
+
the
+quick
+brown
+fox
+jumped
+
+
+

/path/to/eng.user-patterns:

+
+
1-\d\d\d-GOOG-411
+www.\n\\\*.com
+
+
+

/path/to/configs/bazaar:

+
+
load_system_dawg     F
+load_freq_dawg       F
+user_words_suffix    user-words
+user_patterns_suffix user-patterns
+
+
+

Now, if you pass the word bazaar as a CONFIGFILE to +Tesseract, Tesseract will not bother loading the system dictionary nor +the dictionary of frequent words and will load and use the eng.user-words +and eng.user-patterns files you provided. The former is a simple word list, +one per line. The format of the latter is documented in dict/trie.h +on read_pattern_list().

+
+
+
+

ENVIRONMENT VARIABLES

+
+
+
+TESSDATA_PREFIX +
+
+

+ If the TESSDATA_PREFIX is set to a path, then that path is used to + find the tessdata directory with language and script recognition + models and config files. + Using --tessdata-dir PATH is the recommended alternative. +

+
+
+OMP_THREAD_LIMIT +
+
+

+ If the tesseract executable was built with multithreading support, + it will normally use four CPU cores for the OCR process. While this + can be faster for a single image, it gives bad performance if the host + computer provides less than four CPU cores or if OCR is made for many images. + Only a single CPU core is used with OMP_THREAD_LIMIT=1. +

+
+
+
+
+
+

HISTORY

+
+

The engine was developed at Hewlett Packard Laboratories Bristol and at +Hewlett Packard Co, Greeley Colorado between 1985 and 1994, with some more +changes made in 1996 to port to Windows, and some C++izing in 1998. A +lot of the code was written in C, and then some more was written in C++. +The C++ code makes heavy use of a list system using macros. This predates +STL, was portable before STL, and is more efficient than STL lists, but has +the big negative that if you do get a segmentation violation, it is hard to +debug.

+

Version 2.00 brought Unicode (UTF-8) support, six languages, and the ability +to train Tesseract.

+

Tesseract was included in UNLV’s Fourth Annual Test of OCR Accuracy. +See https://github.com/tesseract-ocr/docs/blob/main/AT-1995.pdf. +Since Tesseract 2.00, +scripts are now included to allow anyone to reproduce some of these tests. +See https://tesseract-ocr.github.io/tessdoc/TestingTesseract.html for more +details.

+

Tesseract 3.00 added a number of new languages, including Chinese, Japanese, +and Korean. It also introduced a new, single-file based system of managing +language data.

+

Tesseract 3.02 added BiDirectional text support, the ability to recognize +multiple languages in a single image, and improved layout analysis.

+

Tesseract 4 adds a new neural net (LSTM) based OCR engine which is focused +on line recognition, but also still supports the legacy Tesseract OCR engine of +Tesseract 3 which works by recognizing character patterns. Compatibility with +Tesseract 3 is enabled by --oem 0. This also needs traineddata files which +support the legacy engine, for example those from the tessdata repository +(https://github.com/tesseract-ocr/tessdata).

+

For further details, see the release notes in the Tesseract documentation +(https://tesseract-ocr.github.io/tessdoc/ReleaseNotes.html).

+
+
+ +
+

SEE ALSO

+
+

ambiguous_words(1), cntraining(1), combine_tessdata(1), dawg2wordlist(1), +shape_training(1), mftraining(1), unicharambigs(5), unicharset(5), +unicharset_extractor(1), wordlist2dawg(1)

+
+
+
+

AUTHOR

+
+

Tesseract development was led at Hewlett-Packard and Google by Ray Smith. +The development team has included:

+

Ahmad Abdulkader, Chris Newton, Dan Johnson, Dar-Shyang Lee, David Eger, +Eric Wiseblatt, Faisal Shafait, Hiroshi Takenaka, Joe Liu, Joern Wanke, +Mark Seaman, Mickey Namiki, Nicholas Beato, Oded Fuhrmann, Phil Cheatle, +Pingping Xiu, Pong Eksombatchai (Chantat), Ranjith Unnikrishnan, Raquel +Romano, Ray Smith, Rika Antonova, Robert Moss, Samuel Charron, Sheelagh +Lloyd, Shobhit Saxena, and Thomas Kielbus.

+ +
+
+
+

COPYING

+
+

Licensed under the Apache License, Version 2.0

+
+
+
+

+ + + diff --git a/lib/Tesseract-OCR/tesseract.exe b/lib/Tesseract-OCR/tesseract.exe new file mode 100644 index 0000000000000000000000000000000000000000..4fcd37a2b56fecb39e6542477f2bcfe7f86a73a8 --- /dev/null +++ b/lib/Tesseract-OCR/tesseract.exe @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be192b0d8906bf9c65fd94b5c1f7ba362f9a301feb55516b20d4e67af479e02a +size 1241664 diff --git a/lib/Tesseract-OCR/text2image.1.html b/lib/Tesseract-OCR/text2image.1.html new file mode 100644 index 0000000000000000000000000000000000000000..28e850a035b6af2ffe07c2e91e2ef6c350d22acc --- /dev/null +++ b/lib/Tesseract-OCR/text2image.1.html @@ -0,0 +1,1121 @@ + + + + + +TEXT2IMAGE(1) + + + + + +
+
+

SYNOPSIS

+
+

text2image --text FILE --outputbase PATH --fonts_dir PATH [OPTION]

+
+
+
+

DESCRIPTION

+
+

text2image(1) generates OCR training pages. Given a text file it outputs an image with a given font and degradation.

+
+
+
+

OPTIONS

+
+
+
+--text FILE +
+
+

+ File name of text input to use for creating synthetic training data. (type:string default:) +

+
+
+--outputbase FILE +
+
+

+ Basename for output image/box file (type:string default:) +

+
+
+--fontconfig_tmpdir PATH +
+
+

+ Overrides fontconfig default temporary dir (type:string default:/tmp) +

+
+
+--fonts_dir PATH +
+
+

+ If empty it use system default. Otherwise it overrides system default font location (type:string default:) +

+
+
+--font FONTNAME +
+
+

+ Font description name to use (type:string default:Arial) +

+
+
+--writing_mode MODE +
+
+

+ Specify one of the following writing modes. + horizontal : Render regular horizontal text. (default) + vertical : Render vertical text. Glyph orientation is selected by Pango. + vertical-upright : Render vertical text. Glyph orientation is set to be upright. (type:string default:horizontal) +

+
+
+--tlog_level INT +
+
+

+ Minimum logging level for tlog() output (type:int default:0) +

+
+
+--max_pages INT +
+
+

+ Maximum number of pages to output (0=unlimited) (type:int default:0) +

+
+
+--degrade_image BOOL +
+
+

+ Degrade rendered image with speckle noise, dilation/erosion and rotation (type:bool default:true) +

+
+
+--rotate_image BOOL +
+
+

+ Rotate the image in a random way. (type:bool default:true) +

+
+
+--strip_unrenderable_words BOOL +
+
+

+ Remove unrenderable words from source text (type:bool default:true) +

+
+
+--ligatures BOOL +
+
+

+ Rebuild and render ligatures (type:bool default:false) +

+
+
+--exposure INT +
+
+

+ Exposure level in photocopier (type:int default:0) +

+
+
+--resolution INT +
+
+

+ Pixels per inch (type:int default:300) +

+
+
+--xsize INT +
+
+

+ Width of output image (type:int default:3600) +

+
+
+--ysize INT +
+
+

+ Height of output image (type:int default:4800) +

+
+
+--margin INT +
+
+

+ Margin round edges of image (type:int default:100) +

+
+
+--ptsize INT +
+
+

+ Size of printed text (type:int default:12) +

+
+
+--leading INT +
+
+

+ Inter-line space (in pixels) (type:int default:12) +

+
+
+--box_padding INT +
+
+

+ Padding around produced bounding boxes (type:int default:0) +

+
+
+--char_spacing DOUBLE +
+
+

+ Inter-character space in ems (type:double default:0) +

+
+
+--underline_start_prob DOUBLE +
+
+

+ Fraction of words to underline (value in [0,1]) (type:double default:0) +

+
+
+--underline_continuation_prob DOUBLE +
+
+

+ Fraction of words to underline (value in [0,1]) (type:double default:0) +

+
+
+--render_ngrams BOOL +
+
+

+ Put each space-separated entity from the input file into one bounding box. The ngrams in the input file will be randomly permuted before rendering (so that there is sufficient variety of characters on each line). (type:bool default:false) +

+
+
+--output_word_boxes BOOL +
+
+

+ Output word bounding boxes instead of character boxes. This is used for Cube training, and implied by --render_ngrams. (type:bool default:false) +

+
+
+--unicharset_file FILE +
+
+

+ File with characters in the unicharset. If --render_ngrams is true and --unicharset_file is specified, ngrams with characters that are not in unicharset will be omitted (type:string default:) +

+
+
+--bidirectional_rotation BOOL +
+
+

+ Rotate the generated characters both ways. (type:bool default:false) +

+
+
+--only_extract_font_properties BOOL +
+
+

+ Assumes that the input file contains a list of ngrams. Renders each ngram, extracts spacing properties and records them in output_base/[font_name].fontinfo file. (type:bool default:false) +

+
+
+
+
+
+

Use these flags to output zero-padded, square individual character images

+
+
+
+--output_individual_glyph_images BOOL +
+
+

+ If true also outputs individual character images (type:bool default:false) +

+
+
+--glyph_resized_size INT +
+
+

+ Each glyph is square with this side length in pixels (type:int default:0) +

+
+
+--glyph_num_border_pixels_to_pad INT +
+
+

+ Final_size=glyph_resized_size+2*glyph_num_border_pixels_to_pad (type:int default:0) +

+
+
+
+
+
+

Use these flags to find fonts that can render a given text

+
+
+
+--find_fonts BOOL +
+
+

+ Search for all fonts that can render the text (type:bool default:false) +

+
+
+--render_per_font BOOL +
+
+

+ If find_fonts==true, render each font to its own image. Image filenames are of the form output_name.font_name.tif (type:bool default:true) +

+
+
+--min_coverage DOUBLE +
+
+

+ If find_fonts==true, the minimum coverage the font has of the characters in the text file to include it, between 0 and 1. (type:double default:1) +

+
+
+

Example Usage: +``` +text2image --find_fonts \ +--fonts_dir /usr/share/fonts \ +--text ../langdata/hin/hin.training_text \ +--min_coverage .9 \ +--render_per_font \ +--outputbase ../langdata/hin/hin \ +|& grep raw | sed -e s/ :.*/" \\/g | sed -e s/^/ "/ >../langdata/hin/fontslist.txt +```

+
+
+
+

SINGLE OPTIONS

+
+
+
+--list_available_fonts BOOL +
+
+

+ List available fonts and quit. (type:bool default:false) +

+
+
+
+
+
+

HISTORY

+
+

text2image(1) was first made available for tesseract 3.03.

+
+
+
+

RESOURCES

+ +
+
+

SEE ALSO

+
+

tesseract(1)

+
+
+
+

COPYING

+
+

Copyright (C) 2012 Google, Inc. +Licensed under the Apache License, Version 2.0

+
+
+
+

AUTHOR

+
+

The Tesseract OCR engine was written by Ray Smith and his research groups +at Hewlett Packard (1985-1995) and Google (2006-present).

+
+
+
+

+ + + diff --git a/lib/Tesseract-OCR/text2image.exe b/lib/Tesseract-OCR/text2image.exe new file mode 100644 index 0000000000000000000000000000000000000000..961f0dbe678d4d5f9a3dc27b4e887c77a7c18e7c --- /dev/null +++ b/lib/Tesseract-OCR/text2image.exe @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9ebcea4b670633e081e896de9a48e891352379493861ab859d1cfa58cd1036a +size 9976808 diff --git a/lib/Tesseract-OCR/unicharambigs.5.html b/lib/Tesseract-OCR/unicharambigs.5.html new file mode 100644 index 0000000000000000000000000000000000000000..9d0c91ef45f7c04c6850ed6da4b3fd062f6c3278 --- /dev/null +++ b/lib/Tesseract-OCR/unicharambigs.5.html @@ -0,0 +1,893 @@ + + + + + +UNICHARAMBIGS(5) + + + + + +
+
+

NAME

+
+

unicharambigs - Tesseract unicharset ambiguities

+
+
+
+

DESCRIPTION

+
+

The unicharambigs file (a component of traineddata, see combine_tessdata(1) ) +is used by Tesseract to represent possible ambiguities between characters, +or groups of characters.

+

The file contains a number of lines, laid out as follow:

+
+
+
[num] <TAB> [char(s)] <TAB> [num] <TAB> [char(s)] <TAB> [num]
+
+
+ + + + + + + + + + + + + + + + + + + + +
+Field one +
+
+

+the number of characters contained in field two +

+
+Field two +
+
+

+the character sequence to be replaced +

+
+Field three +
+
+

+the number of characters contained in field four +

+
+Field four +
+
+

+the character sequence used to replace field two +

+
+Field five +
+
+

+contains either 1 or 0. 1 denotes a mandatory +replacement, 0 denotes an optional replacement. +

+
+

Characters appearing in fields two and four should appear in +unicharset. The numbers in fields one and three refer to the +number of unichars (not bytes).

+
+
+
+

EXAMPLE

+
+
+
+
v1
+2       ' '     1       "     1
+1       m       2       r n   0
+3       i i i   1       m     0
+
+

The first line is a version identifier. +In this example, all instances of the 2 character sequence '' will +always be replaced by the 1 character sequence "; a 1 character +sequence m may be replaced by the 2 character sequence rn, and +the 3 character sequence may be replaced by the 1 character +sequence m.

+

Version 3.03 and on supports a new, simpler format for the unicharambigs +file:

+
+
+
v2
+'' " 1
+m rn 0
+iii m 0
+
+

In this format, the "error" and "correction" are simple UTF-8 strings +separated by a space, and, after another space, the same type specifier +as v1 (0 for optional and 1 for mandatory substitution). Note the downside +of this simpler format is that Tesseract has to encode the UTF-8 strings +into the components of the unicharset. In complex scripts, this encoding +may be ambiguous. In this case, the encoding is chosen such as to use the +least UTF-8 characters for each component, ie the shortest unicharset +components will make up the encoding.

+
+
+
+

HISTORY

+
+

The unicharambigs file first appeared in Tesseract 3.00; prior to that, a +similar format, called DangAmbigs (dangerous ambiguities) was used: the +format was almost identical, except only mandatory replacements could be +specified, and field 5 was absent.

+
+
+
+

BUGS

+
+

This is a documentation "bug": it’s not currently clear what should be done +in the case of ligatures (such as fi) which may also appear as regular +letters in the unicharset.

+
+
+ +
+

AUTHOR

+
+

The Tesseract OCR engine was written by Ray Smith and his research groups +at Hewlett Packard (1985-1995) and Google (2006-present).

+
+
+
+

+ + + diff --git a/lib/Tesseract-OCR/unicharset.5.html b/lib/Tesseract-OCR/unicharset.5.html new file mode 100644 index 0000000000000000000000000000000000000000..8692456a395efc5157dfff2eedd3b944c0815e1f --- /dev/null +++ b/lib/Tesseract-OCR/unicharset.5.html @@ -0,0 +1,965 @@ + + + + + +UNICHARSET(5) + + + + + +
+
+

DESCRIPTION

+
+

Tesseract’s unicharset file contains information on each symbol +(unichar) the Tesseract OCR engine is trained to recognize.

+

A unicharset file (i.e. eng.unicharset) is distributed as part of a +Tesseract language pack (i.e. eng.traineddata). For information on +extracting the unicharset file, see combine_tessdata(1).

+

The first line of a unicharset file contains the number of unichars in +the file. After this line, each subsequent line provides information for +a single unichar. The first such line contains a placeholder reserved for +the space character. Each unichar is referred to within Tesseract by its +Unichar ID, which is the line number (minus 1) within the unicharset file. +Therefore, space gets unichar 0.

+

Each unichar line in the unicharset file (v2+) may have four space-separated fields:

+
+
+
'character' 'properties' 'script' 'id'
+
+

Starting with Tesseract v3.02, more information may be given for each unichar:

+
+
+
'character' 'properties' 'glyph_metrics' 'script' 'other_case' 'direction' 'mirror' 'normed_form'
+
+

Entries:

+
+
+character +
+
+

+The UTF-8 encoded string to be produced for this unichar. +

+
+
+properties +
+
+

+An integer mask of character properties, one per bit. + From least to most significant bit, these are: isalpha, islower, isupper, + isdigit, ispunctuation. +

+
+
+glyph_metrics +
+
+

+Ten comma-separated integers representing various standards + for where this glyph is to be found within a baseline-normalized coordinate + system where 128 is normalized to x-height. +

+
    +
  • +

    +min_bottom, max_bottom: the ranges where the bottom of the character can + be found. +

    +
  • +
  • +

    +min_top, max_top: the ranges where the top of the character may be found. +

    +
  • +
  • +

    +min_width, max_width: horizontal width of the character. +

    +
  • +
  • +

    +min_bearing, max_bearing: how far from the usual start position does the + leftmost part of the character begin. +

    +
  • +
  • +

    +min_advance, max_advance: how far from the printer’s cell left do we + advance to begin the next character. +

    +
  • +
+
+
+script +
+
+

+Name of the script (Latin, Common, Greek, Cyrillic, Han, null). +

+
+
+other_case +
+
+

+The Unichar ID of the other case version of this character + (upper or lower). +

+
+
+direction +
+
+

+The Unicode BiDi direction of this character, as defined by + ICU’s enum UCharDirection. (0 = Left to Right, 1 = Right to Left, + 2 = European Number…) +

+
+
+mirror +
+
+

+The Unichar ID of the BiDirectional mirror of this character. + For example the mirror of open paren is close paren, but Latin Capital C + has no mirror, so it remains a Latin Capital C. +

+
+
+normed_form +
+
+

+The UTF-8 representation of a "normalized form" of this unichar + for the purpose of blaming a module for errors given ground truth text. + For instance, a left or right single quote may normalize to an ASCII quote. +

+
+
+
+
+
+

EXAMPLE (v2)

+
+
+
+
; 10 Common 46
+b 3 Latin 59
+W 5 Latin 40
+7 8 Common 66
+= 0 Common 93
+
+

";" is a punctuation character. Its properties are thus represented by the +binary number 10000 (10 in hexadecimal).

+

"b" is an alphabetic character and a lower case character. Its properties are +thus represented by the binary number 00011 (3 in hexadecimal).

+

"W" is an alphabetic character and an upper case character. Its properties are +thus represented by the binary number 00101 (5 in hexadecimal).

+

"7" is just a digit. Its properties are thus represented by the binary number +01000 (8 in hexadecimal).

+

"=" is not punctuation nor a digit nor an alphabetic character. Its properties +are thus represented by the binary number 00000 (0 in hexadecimal).

+

Japanese or Chinese alphabetic character properties are represented by the +binary number 00001 (1 in hexadecimal): they are alphabetic, but neither +upper nor lower case.

+
+
+
+

EXAMPLE (v3.02)

+
+
+
+
110
+NULL 0 NULL 0
+N 5 59,68,216,255,87,236,0,27,104,227 Latin 11 0 1 N
+Y 5 59,68,216,255,91,205,0,47,91,223 Latin 33 0 2 Y
+1 8 59,69,203,255,45,128,0,66,74,173 Common 3 2 3 1
+9 8 18,66,203,255,89,156,0,39,104,173 Common 4 2 4 9
+a 3 58,65,186,198,85,164,0,26,97,185 Latin 56 0 5 a
+. . .
+
+
+
+
+

CAVEATS

+
+

Although the unicharset reader maintains the ability to read unicharsets +of older formats and will assign default values to missing fields, +the accuracy will be degraded.

+

Further, most other data files are indexed by the unicharset file, +so changing it without re-generating the others is likely to have dire +consequences.

+
+
+
+

HISTORY

+
+

The unicharset format first appeared with Tesseract 2.00, which was the +first version to support languages other than English. The unicharset file +contained only the first two fields, and the "ispunctuation" property was +absent (punctuation was regarded as "0", as "=" is in the above example.

+
+
+
+

SEE ALSO

+
+

tesseract(1), combine_tessdata(1), unicharset_extractor(1)

+ +
+
+
+

AUTHOR

+
+

The Tesseract OCR engine was written by Ray Smith and his research groups +at Hewlett Packard (1985-1995) and Google (2006-present).

+
+
+
+

+ + + diff --git a/lib/Tesseract-OCR/unicharset_extractor.1.html b/lib/Tesseract-OCR/unicharset_extractor.1.html new file mode 100644 index 0000000000000000000000000000000000000000..9cd19245ce3ed1ced1101473e75697097c6a78fe --- /dev/null +++ b/lib/Tesseract-OCR/unicharset_extractor.1.html @@ -0,0 +1,804 @@ + + + + + +UNICHARSET_EXTRACTOR(1) + + + + + +
+
+

NAME

+
+

unicharset_extractor - Reads box or plain text files to extract the unicharset.

+
+
+
+

SYNOPSIS

+
+

unicharset_extractor [--output_unicharset filename] [--norm_mode mode] box_or_text_file […]

+

Where mode means: + 1=combine graphemes (use for Latin and other simple scripts) + 2=split graphemes (use for Indic/Khmer/Myanmar) + 3=pure unicode (use for Arabic/Hebrew/Thai/Tibetan)

+
+
+
+

DESCRIPTION

+
+

Tesseract needs to know the set of possible characters it can output. +To generate the unicharset data file, use the unicharset_extractor +program on training pages bounding box files or a plain text file:

+
+
+
unicharset_extractor fontfile_1.box fontfile_2.box ...
+
+

The unicharset will be put into the file ./unicharset if no output filename is provided.

+

NOTE Use the appropriate norm_mode based on the language.

+
+
+
+

SEE ALSO

+ +
+
+

HISTORY

+
+

unicharset_extractor first appeared in Tesseract 2.00.

+
+
+
+

COPYING

+
+

Copyright (C) 2006, Google Inc. +Licensed under the Apache License, Version 2.0

+
+
+
+

AUTHOR

+
+

The Tesseract OCR engine was written by Ray Smith and his research groups +at Hewlett Packard (1985-1995) and Google (2006-present).

+
+
+
+

+ + + diff --git a/lib/Tesseract-OCR/unicharset_extractor.exe b/lib/Tesseract-OCR/unicharset_extractor.exe new file mode 100644 index 0000000000000000000000000000000000000000..48a0d45cabed14ba96622c0cf09ca407955cd7cb --- /dev/null +++ b/lib/Tesseract-OCR/unicharset_extractor.exe @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d483f4b7d60f6923d58b8289ecf7ec5c5cf0352f9fff69b277ab695c50248db1 +size 3771208 diff --git a/lib/Tesseract-OCR/winpath.exe b/lib/Tesseract-OCR/winpath.exe new file mode 100644 index 0000000000000000000000000000000000000000..428e40e914e1fd615ecd931333cb2d192c060499 Binary files /dev/null and b/lib/Tesseract-OCR/winpath.exe differ diff --git a/lib/Tesseract-OCR/wordlist2dawg.1.html b/lib/Tesseract-OCR/wordlist2dawg.1.html new file mode 100644 index 0000000000000000000000000000000000000000..a56322bc19d9ba605ab3ee76d6e96a3442748702 --- /dev/null +++ b/lib/Tesseract-OCR/wordlist2dawg.1.html @@ -0,0 +1,820 @@ + + + + + +WORDLIST2DAWG(1) + + + + + +
+
+

SYNOPSIS

+
+

wordlist2dawg WORDLIST DAWG lang.unicharset

+

wordlist2dawg -t WORDLIST DAWG lang.unicharset

+

wordlist2dawg -r 1 WORDLIST DAWG lang.unicharset

+

wordlist2dawg -r 2 WORDLIST DAWG lang.unicharset

+

wordlist2dawg -l <short> <long> WORDLIST DAWG lang.unicharset

+
+
+
+

DESCRIPTION

+
+

wordlist2dawg(1) converts a wordlist to a Directed Acyclic Word Graph +(DAWG) for use with Tesseract. A DAWG is a compressed, space and time +efficient representation of a word list.

+
+
+
+

OPTIONS

+
+

-t + Verify that a given dawg file is equivalent to a given wordlist.

+

-r 1 + Reverse a word if it contains an RTL character.

+

-r 2 + Reverse all words.

+

-l <short> <long> + Produce a file with several dawgs in it, one each for words + of length <short>, <short+1>,… <long>

+
+
+
+

ARGUMENTS

+
+

WORDLIST + A plain text file in UTF-8, one word per line.

+

DAWG + The output DAWG to write.

+

lang.unicharset + The unicharset of the language. This is the unicharset + generated by mftraining(1).

+
+
+
+

SEE ALSO

+
+

tesseract(1), combine_tessdata(1), dawg2wordlist(1)

+ +
+
+
+

COPYING

+
+

Copyright (C) 2006 Google, Inc. +Licensed under the Apache License, Version 2.0

+
+
+
+

AUTHOR

+
+

The Tesseract OCR engine was written by Ray Smith and his research groups +at Hewlett Packard (1985-1995) and Google (2006-present).

+
+
+
+

+ + + diff --git a/lib/Tesseract-OCR/wordlist2dawg.exe b/lib/Tesseract-OCR/wordlist2dawg.exe new file mode 100644 index 0000000000000000000000000000000000000000..a9e44999b53ced58bd199b3573ded56415016e54 --- /dev/null +++ b/lib/Tesseract-OCR/wordlist2dawg.exe @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b377e7a5074f6530e79ce7f33475b77cae8108a432313a41dce7fe233d433bc +size 1019632 diff --git a/lib/Tesseract-OCR/zlib1.dll b/lib/Tesseract-OCR/zlib1.dll new file mode 100644 index 0000000000000000000000000000000000000000..92b6761aba93ff29eaaf45bea9edd5af4d0de5d7 Binary files /dev/null and b/lib/Tesseract-OCR/zlib1.dll differ diff --git a/lib/poppler-0.68.0/bin/AUTHORS b/lib/poppler-0.68.0/bin/AUTHORS new file mode 100644 index 0000000000000000000000000000000000000000..a785e403211f34fbcc5959fe6b1a273f5a246502 --- /dev/null +++ b/lib/poppler-0.68.0/bin/AUTHORS @@ -0,0 +1,5 @@ +xpdf is written by Derek Noonburg + +libpoppler is a fork of xpdf-3.00 + +Current Maintainer: Albert Astals Cid diff --git a/lib/poppler-0.68.0/bin/BINARIES b/lib/poppler-0.68.0/bin/BINARIES new file mode 100644 index 0000000000000000000000000000000000000000..14cacb6de0a9c336715ea1d5625884172e88e136 --- /dev/null +++ b/lib/poppler-0.68.0/bin/BINARIES @@ -0,0 +1,23 @@ +Version 0.33.0 + +Compiled by Alivate Australia 2015 + +Unless otherwise stated, the software on this site is distributed in the hope +that it will be useful, but WITHOUT ANY WARRANTY; without even the implied +warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. THERE IS NO +WARRANTY FOR THE SOFTWARE, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT +WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS PROVIDE THE SOFTWARE +"AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, +BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF +THE SOFTWARE IS WITH YOU. SHOULD THE SOFTWARE PROVE DEFECTIVE, YOU ASSUME THE +COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. IN NO EVENT UNLESS +REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, +BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR +CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE SOFTWARE +(INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE +OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE SOFTWARE TO +OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER HAS BEEN ADVISED OF +THE POSSIBILITY OF SUCH DAMAGES. + +http://blog.alivate.com.au/poppler-windows diff --git a/lib/poppler-0.68.0/bin/COPYING b/lib/poppler-0.68.0/bin/COPYING new file mode 100644 index 0000000000000000000000000000000000000000..d511905c1647a1e311e8b20d5930a37a9c2531cd --- /dev/null +++ b/lib/poppler-0.68.0/bin/COPYING @@ -0,0 +1,339 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. diff --git a/lib/poppler-0.68.0/bin/COPYING3 b/lib/poppler-0.68.0/bin/COPYING3 new file mode 100644 index 0000000000000000000000000000000000000000..94a9ed024d3859793618152ea559a168bbcbb5e2 --- /dev/null +++ b/lib/poppler-0.68.0/bin/COPYING3 @@ -0,0 +1,674 @@ + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + Copyright (C) + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +. diff --git a/lib/poppler-0.68.0/bin/README b/lib/poppler-0.68.0/bin/README new file mode 100644 index 0000000000000000000000000000000000000000..dc3c19ec31baa66e84a1c95229b83d918b48f2b7 --- /dev/null +++ b/lib/poppler-0.68.0/bin/README @@ -0,0 +1,39 @@ +This is poppler, a PDF rendering library. + +Poppler is a fork of the xpdf PDF viewer developed by Derek Noonburg +of Glyph and Cog, LLC. The purpose of forking xpdf is twofold. +First, we want to provide PDF rendering functionality as a shared +library, to centralize the maintenance effort. Today a number of +applications incorporate the xpdf code base, and whenever a security +issue is discovered, all these applications exchange patches and put +out new releases. In turn, all distributions must package and release +new version of these xpdf based viewers. It's safe to say that +there's a lot of duplicated effort with the current situation. Even if +poppler in the short term introduces yet another xpdf derived code +base to the world, we hope that over time these applications will +adopt poppler. After all, we only need one application to use poppler +to break even. + +Second, we would like to move libpoppler forward in a number of areas +that don't fit within the goals of xpdf. By design, xpdf depends on +very few libraries and runs a wide range of X based platforms. This +is a strong feature and reasonable design goal. However, with poppler +we would like to replace parts of xpdf that are now available as +standard components of modern Unix desktop environments. One such +example is fontconfig, which solves the problem of matching and +locating fonts on the system, in a standardized and well understood +way. Another example is cairo, which provides high quality 2D +rendering. See the file TODO for a list of planned changes. + +Please note that xpdf, and thus poppler, is licensed under the GPL, +not the LGPL. Consequently, any application using poppler must also +be licensed under the GPL. If you want to incorporate Xpdf based PDF +rendering in a closed source product, please contact Glyph & Cog +(www.glyphandcog.com) for commercial licensing options. Note that +this only allows you to use xpdf in a closed source product, +not poppler itself. + + Kristian Høgsberg, Feb. 27, 2005 + + +See the README-XPDF for the original xpdf-3.03 README. diff --git a/lib/poppler-0.68.0/bin/README-XPDF b/lib/poppler-0.68.0/bin/README-XPDF new file mode 100644 index 0000000000000000000000000000000000000000..e97791ce5b20467d74a83715cb2f0879f9b53c40 --- /dev/null +++ b/lib/poppler-0.68.0/bin/README-XPDF @@ -0,0 +1,423 @@ +Xpdf +==== + +version 3.03 +2011-aug-15 + +The Xpdf software and documentation are +copyright 1996-2011 Glyph & Cog, LLC. + +Email: derekn@foolabs.com +WWW: http://www.foolabs.com/xpdf/ + +The PDF data structures, operators, and specification are +copyright 1985-2006 Adobe Systems Inc. + + +What is Xpdf? +------------- + +Xpdf is an open source viewer for Portable Document Format (PDF) +files. (These are also sometimes also called 'Acrobat' files, from +the name of Adobe's PDF software.) The Xpdf project also includes a +PDF text extractor, PDF-to-PostScript converter, and various other +utilities. + +Xpdf runs under the X Window System on UNIX, VMS, and OS/2. The non-X +components (pdftops, pdftotext, etc.) also run on Windows and Mac OSX +systems and should run on pretty much any system with a decent C++ +compiler. Xpdf will run on 32-bit and 64-bit machines. + + +License & Distribution +---------------------- + +Xpdf is licensed under the GNU General Pulbic License (GPL), version 2 +or 3. This means that you can distribute derivatives of Xpdf under +any of the following: + - GPL v2 only + - GPL v3 only + - GPL v2 or v3 + +The Xpdf source package includes the text of both GPL versions: +COPYING for GPL v2, COPYING3 for GPL v3. + +Please note that Xpdf is NOT licensed under "any later version" of the +GPL, as I have no idea what those versions will look like. + +If you are redistributing unmodified copies of Xpdf (or any of the +Xpdf tools) in binary form, you need to include all of the +documentation: README, man pages (or help files), COPYING, and +COPYING3. + +If you want to incorporate the Xpdf source code into another program +(or create a modified version of Xpdf), and you are distributing that +program, you have two options: release your program under the GPL (v2 +and/or v3), or purchase a commercial Xpdf source license. + +If you're interested in commercial licensing, please see the Glyph & +Cog web site: + + http://www.glyphandcog.com/ + + +Compatibility +------------- + +Xpdf is developed and tested on Linux. + +In addition, it has been compiled by others on Solaris, AIX, HP-UX, +Digital Unix, Irix, and numerous other Unix implementations, as well +as VMS and OS/2. It should work on pretty much any system which runs +X11 and has Unix-like libraries. You'll need ANSI C++ and C compilers +to compile it. + +The non-X components of Xpdf (pdftops, pdftotext, pdfinfo, pdffonts, +pdfdetach, pdftoppm, and pdfimages) can also be compiled on Windows +and Mac OSX systems. See the Xpdf web page for details. + +If you compile Xpdf for a system not listed on the web page, please +let me know. If you're willing to make your binary available by ftp +or on the web, I'll be happy to add a link from the Xpdf web page. I +have decided not to host any binaries I didn't compile myself (for +disk space and support reasons). + +If you can't get Xpdf to compile on your system, send me email and +I'll try to help. + +Xpdf has been ported to the Acorn, Amiga, BeOS, and EPOC. See the +Xpdf web page for links. + + +Getting Xpdf +------------ + +The latest version is available from: + + http://www.foolabs.com/xpdf/ + +or: + + ftp://ftp.foolabs.com/pub/xpdf/ + +Source code and several precompiled executables are available. + +Announcements of new versions are posted to comp.text.pdf and emailed +to a list of people. If you'd like to receive email notification of +new versions, just let me know. + + +Running Xpdf +------------ + +To run xpdf, simply type: + + xpdf file.pdf + +To generate a PostScript file, hit the "print" button in xpdf, or run +pdftops: + + pdftops file.pdf + +To generate a plain text file, run pdftotext: + + pdftotext file.pdf + +There are five additional utilities (which are fully described in +their man pages): + + pdfinfo -- dumps a PDF file's Info dictionary (plus some other + useful information) + pdffonts -- lists the fonts used in a PDF file along with various + information for each font + pdfdetach -- lists or extracts embedded files (attachments) from a + PDF file + pdftoppm -- converts a PDF file to a series of PPM/PGM/PBM-format + bitmaps + pdfimages -- extracts the images from a PDF file + +Command line options and many other details are described in the man +pages (xpdf(1), etc.) and the VMS help files (xpdf.hlp, etc.). + +All of these utilities read an optional configuration file: see the +xpdfrc(5) man page. + + +Upgrading from Xpdf 3.02 (and earlier) +-------------------------------------- + +The font configuration system has been changed. Previous versions +used mostly separate commands to configure fonts for display and for +PostScript output. As of 3.03, configuration options that make sense +for both display and PS output have been unified. + +The following xpdfrc commands have been removed: +* displayFontT1, displayFontTT: replaced with fontFile +* displayNamedCIDFontT1, displayNamedCIDFontTT: replaced with fontFile +* displayCIDFontT1, displayCIDFontTT: replaced with fontFileCC +* psFont: replaced with psResidentFont +* psNamedFont16: replaced with psResidentFont16 +* psFont16: replaced with psResidentFontCC + +See the xpdfrc(5) man page for more information on the new commands. + +Pdftops will now embed external 16-bit fonts (configured with the +fontFileCC command) when the PDF file refers to a non-embedded font. +It does not do any subsetting (yet), so the resulting PS files will be +large. + + +Compiling Xpdf +-------------- + +See the separate file, INSTALL. + + +Bugs +---- + +If you find a bug in Xpdf, i.e., if it prints an error message, +crashes, or incorrectly displays a document, and you don't see that +bug listed here, please send me email, with a pointer (URL, ftp site, +etc.) to the PDF file. + + +Acknowledgments +--------------- + +Thanks to: + +* Patrick Voigt for help with the remote server code. +* Patrick Moreau, Martin P.J. Zinser, and David Mathog for the VMS + port. +* David Boldt and Rick Rodgers for sample man pages. +* Brendan Miller for the icon idea. +* Olly Betts for help testing pdftotext. +* Peter Ganten for the OS/2 port. +* Michael Richmond for the Win32 port of pdftops and pdftotext and the + xpdf/cygwin/XFree86 build instructions. +* Frank M. Siegert for improvements in the PostScript code. +* Leo Smiers for the decryption patches. +* Rainer Menzner for creating t1lib, and for helping me adapt it to + xpdf. +* Pine Tree Systems A/S for funding the OPI and EPS support in + pdftops. +* Easy Software Products for funding several improvements to the + PostScript output code. +* Tom Kacvinsky for help with FreeType and for being my interface to + the FreeType team. +* Theppitak Karoonboonyanan for help with Thai support. +* Leonard Rosenthol for help and contributions on a bunch of things. +* Alexandros Diamantidis and Maria Adaloglou for help with Greek + support. +* Lawrence Lai for help with the CJK Unicode maps. + +Various people have contributed modifications made for use by the +pdftex project: + +* Han The Thanh +* Martin Schröder of ArtCom GmbH + + +References +---------- + +Adobe Systems Inc., _PDF Reference, sixth edition: Adobe Portable +Document Format version 1.7_. +http://www.adobe.com/devnet/pdf/pdf_reference.html +[The manual for PDF version 1.7.] + +Adobe Systems Inc., "Errata for the PDF Reference, sixth edition, +version 1.7", October 16, 2006. +http://www.adobe.com/devnet/pdf/pdf_reference.html +[The errata for the PDF 1.7 spec.] + +Adobe Systems Inc., _PostScript Language Reference_, 3rd ed. +Addison-Wesley, 1999, ISBN 0-201-37922-8. +[The official PostScript manual.] + +Adobe Systems, Inc., _The Type 42 Font Format Specification_, +Adobe Developer Support Technical Specification #5012. 1998. +http://partners.adobe.com/asn/developer/pdfs/tn/5012.Type42_Spec.pdf +[Type 42 is the format used to embed TrueType fonts in PostScript +files.] + +Adobe Systems, Inc., _Adobe CMap and CIDFont Files Specification_, +Adobe Developer Support Technical Specification #5014. 1995. +http://www.adobe.com/supportservice/devrelations/PDFS/TN/5014.CIDFont_Spec.pdf +[CMap file format needed for Japanese and Chinese font support.] + +Adobe Systems, Inc., _Adobe-Japan1-4 Character Collection for +CID-Keyed Fonts_, Adobe Developer Support Technical Note #5078. +2000. +http://partners.adobe.com/asn/developer/PDFS/TN/5078.CID_Glyph.pdf +[The Adobe Japanese character set.] + +Adobe Systems, Inc., _Adobe-GB1-4 Character Collection for +CID-Keyed Fonts_, Adobe Developer Support Technical Note #5079. +2000. +http://partners.adobe.com/asn/developer/pdfs/tn/5079.Adobe-GB1-4.pdf +[The Adobe Chinese GB (simplified) character set.] + +Adobe Systems, Inc., _Adobe-CNS1-3 Character Collection for +CID-Keyed Fonts_, Adobe Developer Support Technical Note #5080. +2000. +http://partners.adobe.com/asn/developer/PDFS/TN/5080.CNS_CharColl.pdf +[The Adobe Chinese CNS (traditional) character set.] + +Adobe Systems Inc., _Supporting the DCT Filters in PostScript Level +2_, Adobe Developer Support Technical Note #5116. 1992. +http://www.adobe.com/supportservice/devrelations/PDFS/TN/5116.PS2_DCT.PDF +[Description of the DCTDecode filter parameters.] + +Adobe Systems Inc., _Open Prepress Interface (OPI) Specification - +Version 2.0_, Adobe Developer Support Technical Note #5660. 2000. +http://partners.adobe.com/asn/developer/PDFS/TN/5660.OPI_2.0.pdf + +Adobe Systems Inc., CMap files. +ftp://ftp.oreilly.com/pub/examples/nutshell/cjkv/adobe/ +[The actual CMap files for the 16-bit CJK encodings.] + +Adobe Systems Inc., Unicode glyph lists. +http://partners.adobe.com/asn/developer/type/unicodegn.html +http://partners.adobe.com/asn/developer/type/glyphlist.txt +http://partners.adobe.com/asn/developer/type/corporateuse.txt +http://partners.adobe.com/asn/developer/type/zapfdingbats.txt +[Mappings between character names to Unicode.] + +Adobe Systems Inc., OpenType Specification v. 1.4. +http://partners.adobe.com/public/developer/opentype/index_spec.html +[The OpenType font format spec.] + +Aldus Corp., _OPI: Open Prepress Interface Specification 1.3_. 1993. +http://partners.adobe.com/asn/developer/PDFS/TN/OPI_13.pdf + +Anonymous, RC4 source code. +ftp://ftp.ox.ac.uk/pub/crypto/misc/rc4.tar.gz +ftp://idea.sec.dsi.unimi.it/pub/crypt/code/rc4.tar.gz +[This is the algorithm used to encrypt PDF files.] + +T. Boutell, et al., "PNG (Portable Network Graphics) Specification, +Version 1.0". RFC 2083. +[PDF uses the PNG filter algorithms.] + +CCITT, "Information Technology - Digital Compression and Coding of +Continuous-tone Still Images - Requirements and Guidelines", CCITT +Recommendation T.81. +http://www.w3.org/Graphics/JPEG/ +[The official JPEG spec.] + +A. Chernov, "Registration of a Cyrillic Character Set". RFC 1489. +[Documentation for the KOI8-R Cyrillic encoding.] + +Roman Czyborra, "The ISO 8859 Alphabet Soup". +http://czyborra.com/charsets/iso8859.html +[Documentation on the various ISO 859 encodings.] + +L. Peter Deutsch, "ZLIB Compressed Data Format Specification version +3.3". RFC 1950. +[Information on the general format used in FlateDecode streams.] + +L. Peter Deutsch, "DEFLATE Compressed Data Format Specification +version 1.3". RFC 1951. +[The definition of the compression algorithm used in FlateDecode +streams.] + +Morris Dworkin, "Recommendation for Block Cipher Modes of Operation", +National Institute of Standards, NIST Special Publication 800-38A, +2001. +[The cipher block chaining (CBC) mode used with AES in PDF files.] + +Federal Information Processing Standards Publication 197 (FIPS PUBS +197), "Advanced Encryption Standard (AES)", November 26, 2001. +[AES encryption, used in PDF 1.6.] + +Jim Flowers, "X Logical Font Description Conventions", Version 1.5, X +Consortium Standard, X Version 11, Release 6.1. +ftp://ftp.x.org/pub/R6.1/xc/doc/hardcopy/XLFD/xlfd.PS.Z +[The official specification of X font descriptors, including font +transformation matrices.] + +Foley, van Dam, Feiner, and Hughes, _Computer Graphics: Principles and +Practice_, 2nd ed. Addison-Wesley, 1990, ISBN 0-201-12110-7. +[Colorspace conversion functions, Bezier spline math.] + +Robert L. Hummel, _Programmer's Technical Reference: Data and Fax +Communications_. Ziff-Davis Press, 1993, ISBN 1-56276-077-7. +[CCITT Group 3 and 4 fax decoding.] + +ISO/IEC, _Information technology -- Lossy/lossless coding of bi-level +images_. ISO/IEC 14492, First edition (2001-12-15). +http://webstore.ansi.org/ +[The official JBIG2 standard. The final draft of this spec is +available from http://www.jpeg.org/jbighomepage.html.] + +ISO/IEC, _Information technology -- JPEG 2000 image coding system -- +Part 1: Core coding system_. ISO/IEC 15444-1, First edition +(2000-12-15). +http://webstore.ansi.org/ +[The official JPEG 2000 standard. The final committee draft of this +spec is available from http://www.jpeg.org/JPEG2000.html, but there +were changes made to the bitstream format between that draft and the +published spec.] + +ITU, "Standardization of Group 3 facsimile terminals for document +transmission", ITU-T Recommendation T.4, 1999. +ITU, "Facsimile coding schemes and coding control functions for Group 4 +facsimile apparatus", ITU-T Recommendation T.6, 1993. +http://www.itu.int/ +[The official Group 3 and 4 fax standards - used by the CCITTFaxDecode +stream, as well as the JBIG2Decode stream.] + +B. Kaliski, "PKCS #5: Password-Based Cryptography Specification, +Version 2.0". RFC 2898. +[Defines the padding scheme used with AES encryption in PDF files.] + +Christoph Loeffler, Adriaan Ligtenberg, George S. Moschytz, "Practical +Fast 1-D DCT Algorithms with 11 Multiplications". IEEE Intl. Conf. on +Acoustics, Speech & Signal Processing, 1989, 988-991. +[The fast IDCT algorithm used in the DCTDecode filter.] + +Microsoft, _TrueType 1.0 Font Files_, rev. 1.66. 1995. +http://www.microsoft.com/typography/tt/tt.htm +[The TrueType font spec (in MS Word format, naturally).] + +V. Ostromoukhov, R.D. Hersch, "Stochastic Clustered-Dot Dithering", +Conf. Color Imaging: Device-Independent Color, Color Hardcopy, and +Graphic Arts IV, 1999, SPIE Vol. 3648, 496-505. +http://diwww.epfl.ch/w3lsp/publications/colour/scd.html +[The stochastic dithering algorithm used in Xpdf.] + +P. Peterlin, "ISO 8859-2 (Latin 2) Resources". +http://sizif.mf.uni-lj.si/linux/cee/iso8859-2.html +[This is a web page with all sorts of useful Latin-2 character set and +font information.] + +Charles Poynton, "Color FAQ". +http://www.inforamp.net/~poynton/ColorFAQ.html +[The mapping from the CIE 1931 (XYZ) color space to RGB.] + +R. Rivest, "The MD5 Message-Digest Algorithm". RFC 1321. +[MD5 is used in PDF document encryption.] + +Thai Industrial Standard, "Standard for Thai Character Codes for +Computers", TIS-620-2533 (1990). +http://www.nectec.or.th/it-standards/std620/std620.htm +[The TIS-620 Thai encoding.] + +Unicode Consortium, "Unicode Home Page". +http://www.unicode.org/ +[Online copy of the Unicode spec.] + +W3C Recommendation, "PNG (Portable Network Graphics) Specification +Version 1.0". +http://www.w3.org/Graphics/PNG/ +[Defines the PNG image predictor.] + +Gregory K. Wallace, "The JPEG Still Picture Compression Standard". +ftp://ftp.uu.net/graphics/jpeg/wallace.ps.gz +[Good description of the JPEG standard. Also published in CACM, April +1991, and submitted to IEEE Transactions on Consumer Electronics.] + +F. Yergeau, "UTF-8, a transformation format of ISO 10646". RFC 2279. +[A commonly used Unicode encoding.] diff --git a/lib/poppler-0.68.0/bin/freetype6.dll b/lib/poppler-0.68.0/bin/freetype6.dll new file mode 100644 index 0000000000000000000000000000000000000000..93f68f11aba68e662e5b4acd2ad742a38db78d5a Binary files /dev/null and b/lib/poppler-0.68.0/bin/freetype6.dll differ diff --git a/lib/poppler-0.68.0/bin/jpeg62.dll b/lib/poppler-0.68.0/bin/jpeg62.dll new file mode 100644 index 0000000000000000000000000000000000000000..21c03d551647c2292734f7129106564b1074a17f Binary files /dev/null and b/lib/poppler-0.68.0/bin/jpeg62.dll differ diff --git a/lib/poppler-0.68.0/bin/libcairo-2.dll b/lib/poppler-0.68.0/bin/libcairo-2.dll new file mode 100644 index 0000000000000000000000000000000000000000..37bfaa27da5d3fba729d35b0237c6798155da8b1 --- /dev/null +++ b/lib/poppler-0.68.0/bin/libcairo-2.dll @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4609a0c454188c70e1d668078dac73b2334ecba6a630a0ca910272ea7411136c +size 1138880 diff --git a/lib/poppler-0.68.0/bin/libexpat-1.dll b/lib/poppler-0.68.0/bin/libexpat-1.dll new file mode 100644 index 0000000000000000000000000000000000000000..26f43bba59db345821e8dbe1aed937b7d1e1083f Binary files /dev/null and b/lib/poppler-0.68.0/bin/libexpat-1.dll differ diff --git a/lib/poppler-0.68.0/bin/libfontconfig-1.dll b/lib/poppler-0.68.0/bin/libfontconfig-1.dll new file mode 100644 index 0000000000000000000000000000000000000000..7ad870bb659c03bb3a872a50a3e33fa6ed9a78f4 Binary files /dev/null and b/lib/poppler-0.68.0/bin/libfontconfig-1.dll differ diff --git a/lib/poppler-0.68.0/bin/libgcc_s_dw2-1.dll b/lib/poppler-0.68.0/bin/libgcc_s_dw2-1.dll new file mode 100644 index 0000000000000000000000000000000000000000..a98ce4677032ec735ac9c9a8ddb82b83c6f808bf Binary files /dev/null and b/lib/poppler-0.68.0/bin/libgcc_s_dw2-1.dll differ diff --git a/lib/poppler-0.68.0/bin/libpixman-1-0.dll b/lib/poppler-0.68.0/bin/libpixman-1-0.dll new file mode 100644 index 0000000000000000000000000000000000000000..c4c1ec9fb720054998f09d10f4e92bc2572d61db Binary files /dev/null and b/lib/poppler-0.68.0/bin/libpixman-1-0.dll differ diff --git a/lib/poppler-0.68.0/bin/libpng16-16.dll b/lib/poppler-0.68.0/bin/libpng16-16.dll new file mode 100644 index 0000000000000000000000000000000000000000..a2187c411fa03f6b236241650dc65f81e9f706c7 Binary files /dev/null and b/lib/poppler-0.68.0/bin/libpng16-16.dll differ diff --git a/lib/poppler-0.68.0/bin/libpoppler-79.dll b/lib/poppler-0.68.0/bin/libpoppler-79.dll new file mode 100644 index 0000000000000000000000000000000000000000..0e8a8f647e3ab58c53c8fd7142accba584d091d0 --- /dev/null +++ b/lib/poppler-0.68.0/bin/libpoppler-79.dll @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb16325f172c2b545b8242b7471b6307f8bc95e1cd13cce136aa3ef3b0bffa38 +size 19556669 diff --git a/lib/poppler-0.68.0/bin/libpoppler-cpp-0.dll b/lib/poppler-0.68.0/bin/libpoppler-cpp-0.dll new file mode 100644 index 0000000000000000000000000000000000000000..a1d02f7c76bcbfe09c2f4727fe2d22b9520a7f09 --- /dev/null +++ b/lib/poppler-0.68.0/bin/libpoppler-cpp-0.dll @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf29e8447d8d6205bffe6af24b47a16d8298c58f3de2703fa071d72d61cfc680 +size 2297624 diff --git a/lib/poppler-0.68.0/bin/libstdc++-6.dll b/lib/poppler-0.68.0/bin/libstdc++-6.dll new file mode 100644 index 0000000000000000000000000000000000000000..a773a99e8665f843e9dad2c181974b870e62bf2b --- /dev/null +++ b/lib/poppler-0.68.0/bin/libstdc++-6.dll @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7171607dd392289c09b7ffeb4df1389c4f39bb895daef35e3f0d768589c2308f +size 1508122 diff --git a/lib/poppler-0.68.0/bin/libtiff3.dll b/lib/poppler-0.68.0/bin/libtiff3.dll new file mode 100644 index 0000000000000000000000000000000000000000..9883c51a2f6a90eb063b998ddb21b8fb645f36ee Binary files /dev/null and b/lib/poppler-0.68.0/bin/libtiff3.dll differ diff --git a/lib/poppler-0.68.0/bin/pdfdetach.exe b/lib/poppler-0.68.0/bin/pdfdetach.exe new file mode 100644 index 0000000000000000000000000000000000000000..34f51bb943d4c3c7c4a32d3c480aca5b1e65605a Binary files /dev/null and b/lib/poppler-0.68.0/bin/pdfdetach.exe differ diff --git a/lib/poppler-0.68.0/bin/pdffonts.exe b/lib/poppler-0.68.0/bin/pdffonts.exe new file mode 100644 index 0000000000000000000000000000000000000000..150d6c1fada911dccf8122e189518e9dff5af32e Binary files /dev/null and b/lib/poppler-0.68.0/bin/pdffonts.exe differ diff --git a/lib/poppler-0.68.0/bin/pdfimages.exe b/lib/poppler-0.68.0/bin/pdfimages.exe new file mode 100644 index 0000000000000000000000000000000000000000..5286279ce5a3e9c2f1dd5b6ce996654ee22815ce Binary files /dev/null and b/lib/poppler-0.68.0/bin/pdfimages.exe differ diff --git a/lib/poppler-0.68.0/bin/pdfinfo.exe b/lib/poppler-0.68.0/bin/pdfinfo.exe new file mode 100644 index 0000000000000000000000000000000000000000..a9c5130c9b9e8a4a5645aa59ee83ad11dd07fced --- /dev/null +++ b/lib/poppler-0.68.0/bin/pdfinfo.exe @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e91e10e2c98d9c66c611180050329272ce0e3e443337c43e1c11fe9e18bdaebf +size 1146115 diff --git a/lib/poppler-0.68.0/bin/pdfseparate.exe b/lib/poppler-0.68.0/bin/pdfseparate.exe new file mode 100644 index 0000000000000000000000000000000000000000..93b6a88f26cfc5425eaab9be3c8cc1787807290a Binary files /dev/null and b/lib/poppler-0.68.0/bin/pdfseparate.exe differ diff --git a/lib/poppler-0.68.0/bin/pdftocairo.exe b/lib/poppler-0.68.0/bin/pdftocairo.exe new file mode 100644 index 0000000000000000000000000000000000000000..23109569450bde53ae2ada2f12f176c7f9247ed8 --- /dev/null +++ b/lib/poppler-0.68.0/bin/pdftocairo.exe @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c59a6ed8f7609c60419d73692325bc5944e5b665efb54472dcb74fcb6be600af +size 1795647 diff --git a/lib/poppler-0.68.0/bin/pdftohtml.exe b/lib/poppler-0.68.0/bin/pdftohtml.exe new file mode 100644 index 0000000000000000000000000000000000000000..a8fb58b52b05aa406dd359557ee3f13620f83c60 --- /dev/null +++ b/lib/poppler-0.68.0/bin/pdftohtml.exe @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e82d922c250871abe89c9befaee504d713351f807203992f91c81d8510121bb4 +size 1555947 diff --git a/lib/poppler-0.68.0/bin/pdftoppm.exe b/lib/poppler-0.68.0/bin/pdftoppm.exe new file mode 100644 index 0000000000000000000000000000000000000000..8642e314896aa448f17516dd0548ce0792a50d4f Binary files /dev/null and b/lib/poppler-0.68.0/bin/pdftoppm.exe differ diff --git a/lib/poppler-0.68.0/bin/pdftops.exe b/lib/poppler-0.68.0/bin/pdftops.exe new file mode 100644 index 0000000000000000000000000000000000000000..0459c4fb59cd4c628c70fd890abfe0ddcb2f20c5 Binary files /dev/null and b/lib/poppler-0.68.0/bin/pdftops.exe differ diff --git a/lib/poppler-0.68.0/bin/pdftotext.exe b/lib/poppler-0.68.0/bin/pdftotext.exe new file mode 100644 index 0000000000000000000000000000000000000000..459efb1523206f92bc8eecf0e51b145ad9de162d Binary files /dev/null and b/lib/poppler-0.68.0/bin/pdftotext.exe differ diff --git a/lib/poppler-0.68.0/bin/pdfunite.exe b/lib/poppler-0.68.0/bin/pdfunite.exe new file mode 100644 index 0000000000000000000000000000000000000000..577fedccae0778237f8e114765650b0cdc7c43c8 Binary files /dev/null and b/lib/poppler-0.68.0/bin/pdfunite.exe differ diff --git a/lib/poppler-0.68.0/bin/zlib1.dll b/lib/poppler-0.68.0/bin/zlib1.dll new file mode 100644 index 0000000000000000000000000000000000000000..f2452c90604277670ed3878c02e9dbb3dcc6ae3a Binary files /dev/null and b/lib/poppler-0.68.0/bin/zlib1.dll differ diff --git a/lib/poppler-0.68.0/include/poppler/cpp/poppler-document.h b/lib/poppler-0.68.0/include/poppler/cpp/poppler-document.h new file mode 100644 index 0000000000000000000000000000000000000000..c2322634701fdf6c1819d2ce10142e8dc8503c1e --- /dev/null +++ b/lib/poppler-0.68.0/include/poppler/cpp/poppler-document.h @@ -0,0 +1,132 @@ +/* + * Copyright (C) 2009-2010, Pino Toscano + * Copyright (C) 2016 Jakub Alba + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef POPPLER_DOCUMENT_H +#define POPPLER_DOCUMENT_H + +#include "poppler-global.h" +#include "poppler-font.h" + +namespace poppler +{ + +class document_private; +class embedded_file; +class page; +class toc; + +class POPPLER_CPP_EXPORT document : public poppler::noncopyable +{ +public: + enum page_mode_enum { + use_none, + use_outlines, + use_thumbs, + fullscreen, + use_oc, + use_attach + }; + + enum page_layout_enum { + no_layout, + single_page, + one_column, + two_column_left, + two_column_right, + two_page_left, + two_page_right + }; + + ~document(); + + bool is_locked() const; + bool unlock(const std::string &owner_password, const std::string &user_password); + + page_mode_enum page_mode() const; + page_layout_enum page_layout() const; + void get_pdf_version(int *major, int *minor) const; + std::vector info_keys() const; + + ustring info_key(const std::string &key) const; + bool set_info_key(const std::string &key, const ustring &val); + + time_type info_date(const std::string &key) const; + bool set_info_date(const std::string &key, time_type val); + + ustring get_title() const; + bool set_title(const ustring &title); + ustring get_author() const; + bool set_author(const ustring &author); + ustring get_subject() const; + bool set_subject(const ustring &subject); + ustring get_keywords() const; + bool set_keywords(const ustring &keywords); + ustring get_creator() const; + bool set_creator(const ustring &creator); + ustring get_producer() const; + bool set_producer(const ustring &producer); + time_type get_creation_date() const; + bool set_creation_date(time_type creation_date); + time_type get_modification_date() const; + bool set_modification_date(time_type mod_date); + + bool remove_info(); + + bool is_encrypted() const; + bool is_linearized() const; + bool has_permission(permission_enum which) const; + ustring metadata() const; + bool get_pdf_id(std::string *permanent_id, std::string *update_id) const; + + int pages() const; + page* create_page(const ustring &label) const; + page* create_page(int index) const; + + std::vector fonts() const; + font_iterator* create_font_iterator(int start_page = 0) const; + + toc* create_toc() const; + + bool has_embedded_files() const; + std::vector embedded_files() const; + + bool save(const std::string &filename) const; + bool save_a_copy(const std::string &filename) const; + + static document* load_from_file(const std::string &file_name, + const std::string &owner_password = std::string(), + const std::string &user_password = std::string()); + static document* load_from_data(byte_array *file_data, + const std::string &owner_password = std::string(), + const std::string &user_password = std::string()); + static document* load_from_raw_data(const char *file_data, + int file_data_length, + const std::string &owner_password = std::string(), + const std::string &user_password = std::string()); + +private: + document(document_private &dd); + + document_private *d; + friend class document_private; +}; + +} + +#endif diff --git a/lib/poppler-0.68.0/include/poppler/cpp/poppler-embedded-file.h b/lib/poppler-0.68.0/include/poppler/cpp/poppler-embedded-file.h new file mode 100644 index 0000000000000000000000000000000000000000..307fdd6472f5fc34bc4adffd03aa430a5f658068 --- /dev/null +++ b/lib/poppler-0.68.0/include/poppler/cpp/poppler-embedded-file.h @@ -0,0 +1,55 @@ +/* + * Copyright (C) 2009-2010, Pino Toscano + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef POPPLER_EMBEDDED_FILE_H +#define POPPLER_EMBEDDED_FILE_H + +#include "poppler-global.h" + +#include + +namespace poppler +{ + +class embedded_file_private; + +class POPPLER_CPP_EXPORT embedded_file : public poppler::noncopyable +{ +public: + ~embedded_file(); + + bool is_valid() const; + std::string name() const; + ustring description() const; + int size() const; + time_type modification_date() const; + time_type creation_date() const; + byte_array checksum() const; + std::string mime_type() const; + byte_array data() const; + +private: + embedded_file(embedded_file_private &dd); + + embedded_file_private *d; + friend class embedded_file_private; +}; + +} + +#endif diff --git a/lib/poppler-0.68.0/include/poppler/cpp/poppler-font.h b/lib/poppler-0.68.0/include/poppler/cpp/poppler-font.h new file mode 100644 index 0000000000000000000000000000000000000000..854b7a40606892d31b4763d12a756ac606d6047b --- /dev/null +++ b/lib/poppler-0.68.0/include/poppler/cpp/poppler-font.h @@ -0,0 +1,91 @@ +/* + * Copyright (C) 2009, Pino Toscano + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef POPPLER_FONT_H +#define POPPLER_FONT_H + +#include "poppler-global.h" + +#include + +namespace poppler +{ + +class document; +class document_private; +class font_info_private; +class font_iterator; +class font_iterator_private; + +class POPPLER_CPP_EXPORT font_info +{ +public: + enum type_enum { + unknown, + type1, + type1c, + type1c_ot, + type3, + truetype, + truetype_ot, + cid_type0, + cid_type0c, + cid_type0c_ot, + cid_truetype, + cid_truetype_ot + }; + + font_info(); + font_info(const font_info &fi); + ~font_info(); + + std::string name() const; + std::string file() const; + bool is_embedded() const; + bool is_subset() const; + type_enum type() const; + + font_info& operator=(const font_info &fi); + +private: + font_info(font_info_private &dd); + + font_info_private *d; + friend class font_iterator; +}; + + +class POPPLER_CPP_EXPORT font_iterator : public poppler::noncopyable +{ +public: + ~font_iterator(); + + std::vector next(); + bool has_next() const; + int current_page() const; + +private: + font_iterator(int, document_private *dd); + + font_iterator_private *d; + friend class document; +}; + +} + +#endif diff --git a/lib/poppler-0.68.0/include/poppler/cpp/poppler-global.h b/lib/poppler-0.68.0/include/poppler/cpp/poppler-global.h new file mode 100644 index 0000000000000000000000000000000000000000..eb7ec244abad70bdb661718f18668ffca93ba113 --- /dev/null +++ b/lib/poppler-0.68.0/include/poppler/cpp/poppler-global.h @@ -0,0 +1,116 @@ +/* + * Copyright (C) 2009-2010, Pino Toscano + * Copyright (C) 2010, Patrick Spendrin + * Copyright (C) 2014, Hans-Peter Deifel + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef POPPLER_GLOBAL_H +#define POPPLER_GLOBAL_H + +#if defined(_WIN32) +# define LIB_EXPORT __declspec(dllexport) +# define LIB_IMPORT __declspec(dllimport) +#else +# define LIB_EXPORT +# define LIB_IMPORT +#endif + +#if defined(poppler_cpp_EXPORTS) +# define POPPLER_CPP_EXPORT LIB_EXPORT +#else +# define POPPLER_CPP_EXPORT LIB_IMPORT +#endif + +#include +#include +#include + +namespace poppler +{ + +/// \cond DOXYGEN_SKIP_THIS +namespace detail +{ + +class POPPLER_CPP_EXPORT noncopyable +{ +protected: + noncopyable(); + ~noncopyable(); +private: + noncopyable(const noncopyable &); + const noncopyable& operator=(const noncopyable &); +}; + +} + +typedef detail::noncopyable noncopyable; +/// \endcond + +enum rotation_enum { rotate_0, rotate_90, rotate_180, rotate_270 }; + +enum page_box_enum { media_box, crop_box, bleed_box, trim_box, art_box }; + +enum permission_enum { perm_print, perm_change, perm_copy, perm_add_notes, + perm_fill_forms, perm_accessibility, perm_assemble, + perm_print_high_resolution }; + +enum case_sensitivity_enum { case_sensitive, case_insensitive }; + +typedef std::vector byte_array; + +typedef unsigned int /* time_t */ time_type; + +// to disable warning only for this occurrence +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable: 4251) /* class 'A' needs to have dll interface for to be used by clients of class 'B'. */ +#endif +class POPPLER_CPP_EXPORT ustring : public std::basic_string +{ +public: + ustring(); + ustring(size_type len, value_type ch); + ~ustring(); + + byte_array to_utf8() const; + std::string to_latin1() const; + + static ustring from_utf8(const char *str, int len = -1); + static ustring from_latin1(const std::string &str); + +private: + // forbid implicit std::string conversions + ustring(const std::string &); + operator std::string() const; + ustring& operator=(const std::string &); +}; +#ifdef _MSC_VER +#pragma warning(pop) +#endif + +POPPLER_CPP_EXPORT time_type convert_date(const std::string &date); + +POPPLER_CPP_EXPORT std::ostream& operator<<(std::ostream& stream, const byte_array &array); + +typedef void(*debug_func)(const std::string &, void *); + +POPPLER_CPP_EXPORT void set_debug_error_function(debug_func debug_function, void *closure); + +} + +#endif diff --git a/lib/poppler-0.68.0/include/poppler/cpp/poppler-image.h b/lib/poppler-0.68.0/include/poppler/cpp/poppler-image.h new file mode 100644 index 0000000000000000000000000000000000000000..030ed1d12a1694895807b90a0b01f19aad022fbc --- /dev/null +++ b/lib/poppler-0.68.0/include/poppler/cpp/poppler-image.h @@ -0,0 +1,74 @@ +/* + * Copyright (C) 2010, Pino Toscano + * Copyright (C) 2018, Zsombor Hollay-Horvath + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef POPPLER_IMAGE_H +#define POPPLER_IMAGE_H + +#include "poppler-global.h" +#include "poppler-rectangle.h" + +namespace poppler +{ + +class image_private; + +class POPPLER_CPP_EXPORT image +{ +public: + enum format_enum { + format_invalid, + format_mono, + format_rgb24, + format_argb32, + format_gray8, + format_bgr24 + }; + + image(); + image(int iwidth, int iheight, format_enum iformat); + image(char *idata, int iwidth, int iheight, format_enum iformat); + image(const image &img); + ~image(); + + bool is_valid() const; + format_enum format() const; + int width() const; + int height() const; + char *data(); + const char *const_data() const; + int bytes_per_row() const; + + image copy(const rect &r = rect()) const; + + bool save(const std::string &file_name, const std::string &out_format, int dpi = -1) const; + + static std::vector supported_image_formats(); + + image& operator=(const image &img); + +private: + void detach(); + + image_private *d; + friend class image_private; +}; + +} + +#endif diff --git a/lib/poppler-0.68.0/include/poppler/cpp/poppler-page-renderer.h b/lib/poppler-0.68.0/include/poppler/cpp/poppler-page-renderer.h new file mode 100644 index 0000000000000000000000000000000000000000..368281a8b2183eeb74d4e9773421a6e79abd4e69 --- /dev/null +++ b/lib/poppler-0.68.0/include/poppler/cpp/poppler-page-renderer.h @@ -0,0 +1,79 @@ +/* + * Copyright (C) 2010, Pino Toscano + * Copyright (C) 2018, Zsombor Hollay-Horvath + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef POPPLER_PAGE_RENDERER_H +#define POPPLER_PAGE_RENDERER_H + +#include "poppler-global.h" +#include "poppler-image.h" + +namespace poppler +{ + +typedef unsigned int argb; + +class page; +class page_renderer_private; + +class POPPLER_CPP_EXPORT page_renderer : public poppler::noncopyable +{ +public: + enum render_hint { + antialiasing = 0x00000001, + text_antialiasing = 0x00000002, + text_hinting = 0x00000004 + }; + + enum line_mode_enum { + line_default, + line_solid, + line_shape + }; + + page_renderer(); + ~page_renderer(); + + argb paper_color() const; + void set_paper_color(argb c); + + unsigned int render_hints() const; + void set_render_hint(render_hint hint, bool on = true); + void set_render_hints(unsigned int hints); + + image::format_enum image_format() const; + void set_image_format(image::format_enum format); + + line_mode_enum line_mode() const; + void set_line_mode(line_mode_enum mode); + + image render_page(const page *p, + double xres = 72.0, double yres = 72.0, + int x = -1, int y = -1, int w = -1, int h = -1, + rotation_enum rotate = rotate_0) const; + + static bool can_render(); + +private: + page_renderer_private *d; + friend class page_renderer_private; +}; + +} + +#endif diff --git a/lib/poppler-0.68.0/include/poppler/cpp/poppler-page-transition.h b/lib/poppler-0.68.0/include/poppler/cpp/poppler-page-transition.h new file mode 100644 index 0000000000000000000000000000000000000000..4ecb179a2a01a3be3ac7aa43748c69d0baace3fd --- /dev/null +++ b/lib/poppler-0.68.0/include/poppler/cpp/poppler-page-transition.h @@ -0,0 +1,82 @@ +/* + * Copyright (C) 2009, Pino Toscano + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef POPPLER_PAGE_TRANSITION_H +#define POPPLER_PAGE_TRANSITION_H + +#include "poppler-global.h" + +class Object; + +namespace poppler +{ + +class page; +class page_transition_private; + +class POPPLER_CPP_EXPORT page_transition +{ +public: + enum type_enum { + replace = 0, + split, + blinds, + box, + wipe, + dissolve, + glitter, + fly, + push, + cover, + uncover, + fade + }; + + enum alignment_enum { + horizontal = 0, + vertical + }; + + enum direction_enum { + inward = 0, + outward + }; + + page_transition(const page_transition &pt); + ~page_transition(); + + type_enum type() const; + int duration() const; + alignment_enum alignment() const; + direction_enum direction() const; + int angle() const; + double scale() const; + bool is_rectangular() const; + + page_transition& operator=(const page_transition &pt); + +private: + page_transition(Object *params); + + page_transition_private *d; + friend class page; +}; + +} + +#endif diff --git a/lib/poppler-0.68.0/include/poppler/cpp/poppler-page.h b/lib/poppler-0.68.0/include/poppler/cpp/poppler-page.h new file mode 100644 index 0000000000000000000000000000000000000000..a7dcc872fbc9cd2f32dbfb9754996a57a3aea50a --- /dev/null +++ b/lib/poppler-0.68.0/include/poppler/cpp/poppler-page.h @@ -0,0 +1,141 @@ +/* + * Copyright (C) 2009-2010, Pino Toscano + * Copyright (C) 2018, Suzuki Toshiya + * Copyright (C) 2018, Albert Astals Cid + * Copyright (C) 2018, Zsombor Hollay-Horvath + * Copyright (C) 2018, Aleksey Nikolaev + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef POPPLER_PAGE_H +#define POPPLER_PAGE_H + +#include "poppler-global.h" +#include "poppler-rectangle.h" + +#include + +namespace poppler +{ + +struct text_box_data; +class POPPLER_CPP_EXPORT text_box +{ + friend class page; +public: + text_box(text_box&&); + text_box& operator=(text_box&&); + + ~text_box(); + + ustring text() const; + rectf bbox() const; + + /** + \since 0.68 + */ + int rotation() const; + + /** + Get a bbox for the i-th glyph + + This method returns a rectf of the bounding box for + the i-th glyph in the text_box. + + \note The text_box object owns the rectf objects, + the caller is not needed to free them. + + \warning For too large glyph index, rectf(0,0,0,0) + is returned. The number of the glyphs and ustring + codepoints might be different in some complex scripts. + */ + rectf char_bbox(size_t i) const; + bool has_space_after() const; +private: + text_box(text_box_data *data); + + std::unique_ptr m_data; +}; + +class document; +class document_private; +class page_private; +class page_transition; + +class POPPLER_CPP_EXPORT page : public poppler::noncopyable +{ +public: + enum orientation_enum { + landscape, + portrait, + seascape, + upside_down + }; + enum search_direction_enum { + search_from_top, + search_next_result, + search_previous_result + }; + enum text_layout_enum { + physical_layout, + raw_order_layout + }; + + ~page(); + + orientation_enum orientation() const; + double duration() const; + rectf page_rect(page_box_enum box = crop_box) const; + ustring label() const; + + page_transition* transition() const; + + bool search(const ustring &text, rectf &r, search_direction_enum direction, + case_sensitivity_enum case_sensitivity, rotation_enum rotation = rotate_0) const; + ustring text(const rectf &rect = rectf()) const; + ustring text(const rectf &rect, text_layout_enum layout_mode) const; + + /** + Returns a list of text of the page + + This method returns a std::vector of text_box that contain all + the text of the page, with roughly one text word of text + per text_box item. + + For text written in western languages (left-to-right and + up-to-down), the std::vector contains the text in the proper + order. + + \since 0.63 + + \note The page object owns the text_box objects as unique_ptr, + the caller is not needed to free them. + + \warning This method is not tested with Asian scripts + */ + std::vector text_list() const; + +private: + page(document_private *doc, int index); + + page_private *d; + friend class page_private; + friend class document; +}; + +} + +#endif diff --git a/lib/poppler-0.68.0/include/poppler/cpp/poppler-rectangle.h b/lib/poppler-0.68.0/include/poppler/cpp/poppler-rectangle.h new file mode 100644 index 0000000000000000000000000000000000000000..365d07cf1c3496625c185d0d4269e19e4e859d5b --- /dev/null +++ b/lib/poppler-0.68.0/include/poppler/cpp/poppler-rectangle.h @@ -0,0 +1,84 @@ +/* + * Copyright (C) 2009-2010, Pino Toscano + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef POPPLER_RECTANGLE_H +#define POPPLER_RECTANGLE_H + +#include "poppler-global.h" + +namespace poppler +{ + +template class rectangle +{ +public: + rectangle() + : x1(), y1(), x2(), y2() + {} + rectangle(T _x, T _y, T w, T h) + : x1(_x), y1(_y), x2(x1 + w), y2(y1 + h) + {} + ~rectangle() + {} + + bool is_empty() const + { return (x1 == x2) && (y1 == y2); } + + T x() const + { return x1; } + + T y() const + { return y1; } + + T width() const + { return x2 - x1; } + + T height() const + { return y2 - y1; } + + T left() const + { return x1; } + T top() const + { return y1; } + T right() const + { return x2; } + T bottom() const + { return y2; } + + void set_left(T value) + { x1 = value; } + void set_top(T value) + { y1 = value; } + void set_right(T value) + { x2 = value; } + void set_bottom(T value) + { y2 = value; } + +private: + T x1, y1, x2, y2; +}; + +typedef rectangle rect; +typedef rectangle rectf; + +POPPLER_CPP_EXPORT std::ostream& operator<<(std::ostream& stream, const rect &r); +POPPLER_CPP_EXPORT std::ostream& operator<<(std::ostream& stream, const rectf &r); + +} + +#endif diff --git a/lib/poppler-0.68.0/include/poppler/cpp/poppler-toc.h b/lib/poppler-0.68.0/include/poppler/cpp/poppler-toc.h new file mode 100644 index 0000000000000000000000000000000000000000..8b09736fbbdbe2fdd7fbfd9d501e22444f9c9997 --- /dev/null +++ b/lib/poppler-0.68.0/include/poppler/cpp/poppler-toc.h @@ -0,0 +1,74 @@ +/* + * Copyright (C) 2009, Pino Toscano + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef POPPLER_TOC_H +#define POPPLER_TOC_H + +#include "poppler-global.h" + +#include + +namespace poppler +{ + +class toc_private; +class toc_item; +class toc_item_private; + +class POPPLER_CPP_EXPORT toc : public poppler::noncopyable +{ +public: + ~toc(); + + toc_item* root() const; + +private: + toc(); + + toc_private *d; + + friend class toc_private; +}; + + +class POPPLER_CPP_EXPORT toc_item : public poppler::noncopyable +{ +public: + typedef std::vector::const_iterator iterator; + + ~toc_item(); + + ustring title() const; + bool is_open() const; + + std::vector children() const; + iterator children_begin() const; + iterator children_end() const; + +private: + toc_item(); + + toc_item_private *d; + friend class toc; + friend class toc_private; + friend class toc_item_private; +}; + +} + +#endif diff --git a/lib/poppler-0.68.0/include/poppler/cpp/poppler-version.h b/lib/poppler-0.68.0/include/poppler/cpp/poppler-version.h new file mode 100644 index 0000000000000000000000000000000000000000..d7eeb5e6b13e3ddc72a5eaf8da3d19fc414571dc --- /dev/null +++ b/lib/poppler-0.68.0/include/poppler/cpp/poppler-version.h @@ -0,0 +1,39 @@ +/* + * Copyright (C) 2009, Pino Toscano + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef POPPLER_VERSION_H +#define POPPLER_VERSION_H + +#include "poppler-global.h" + +#define POPPLER_VERSION "0.68.0" +#define POPPLER_VERSION_MAJOR 0 +#define POPPLER_VERSION_MINOR 68 +#define POPPLER_VERSION_MICRO 0 + +namespace poppler +{ + +POPPLER_CPP_EXPORT std::string version_string(); +POPPLER_CPP_EXPORT unsigned int version_major(); +POPPLER_CPP_EXPORT unsigned int version_minor(); +POPPLER_CPP_EXPORT unsigned int version_micro(); + +} + +#endif diff --git a/lib/poppler-0.68.0/lib/libpoppler-cpp.dll.a b/lib/poppler-0.68.0/lib/libpoppler-cpp.dll.a new file mode 100644 index 0000000000000000000000000000000000000000..3898336d2c56734aeeba25055dd0d00bf45a7be8 Binary files /dev/null and b/lib/poppler-0.68.0/lib/libpoppler-cpp.dll.a differ diff --git a/lib/poppler-0.68.0/lib/libpoppler.dll.a b/lib/poppler-0.68.0/lib/libpoppler.dll.a new file mode 100644 index 0000000000000000000000000000000000000000..1f9d6227fca0e5c2f5d54cfd421bcf82b36621b2 --- /dev/null +++ b/lib/poppler-0.68.0/lib/libpoppler.dll.a @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf8c74fd0b881b8bb63341bb36019ea56656831a9037d4b7fdd4cc8012fc8afa +size 3620688 diff --git a/lib/poppler-0.68.0/lib/pkgconfig/poppler-cairo.pc b/lib/poppler-0.68.0/lib/pkgconfig/poppler-cairo.pc new file mode 100644 index 0000000000000000000000000000000000000000..7267a7eba71c337996a22365d482724e43452313 --- /dev/null +++ b/lib/poppler-0.68.0/lib/pkgconfig/poppler-cairo.pc @@ -0,0 +1,8 @@ +prefix=C:/temp/poppler-install-dir/x86/poppler-0.69.0 +libdir=C:/temp/poppler-install-dir/x86/poppler-0.69.0/lib +includedir=C:/temp/poppler-install-dir/x86/poppler-0.69.0/include + +Name: poppler-cairo +Description: Cairo backend for Poppler PDF rendering library +Version: 0.68.0 +Requires: poppler = 0.68.0 cairo >= 1.10.0 diff --git a/lib/poppler-0.68.0/lib/pkgconfig/poppler-cpp.pc b/lib/poppler-0.68.0/lib/pkgconfig/poppler-cpp.pc new file mode 100644 index 0000000000000000000000000000000000000000..41e1be6fa8a165e684e92b51fa68fc65ce5f8fb2 --- /dev/null +++ b/lib/poppler-0.68.0/lib/pkgconfig/poppler-cpp.pc @@ -0,0 +1,12 @@ +prefix=C:/temp/poppler-install-dir/x86/poppler-0.69.0 +libdir=C:/temp/poppler-install-dir/x86/poppler-0.69.0/lib +includedir=C:/temp/poppler-install-dir/x86/poppler-0.69.0/include + +Name: poppler-cpp +Description: cpp backend for Poppler PDF rendering library +Version: 0.68.0 +Requires: +Requires.private: poppler = 0.68.0 + +Libs: -L${libdir} -lpoppler-cpp +Cflags: -I${includedir}/poppler/cpp diff --git a/lib/poppler-0.68.0/lib/pkgconfig/poppler-splash.pc b/lib/poppler-0.68.0/lib/pkgconfig/poppler-splash.pc new file mode 100644 index 0000000000000000000000000000000000000000..efcc2ab3e78c42cb78000885fa317bfdc18dfce6 --- /dev/null +++ b/lib/poppler-0.68.0/lib/pkgconfig/poppler-splash.pc @@ -0,0 +1,8 @@ +prefix=C:/temp/poppler-install-dir/x86/poppler-0.69.0 +libdir=C:/temp/poppler-install-dir/x86/poppler-0.69.0/lib +includedir=C:/temp/poppler-install-dir/x86/poppler-0.69.0/include + +Name: poppler-splash +Description: Splash backend for Poppler PDF rendering library +Version: 0.68.0 +Requires: poppler = 0.68.0 diff --git a/lib/poppler-0.68.0/lib/pkgconfig/poppler.pc b/lib/poppler-0.68.0/lib/pkgconfig/poppler.pc new file mode 100644 index 0000000000000000000000000000000000000000..c0c71d5ac4f9f09f37b323be35085ddfcefdde01 --- /dev/null +++ b/lib/poppler-0.68.0/lib/pkgconfig/poppler.pc @@ -0,0 +1,10 @@ +prefix=C:/temp/poppler-install-dir/x86/poppler-0.69.0 +libdir=C:/temp/poppler-install-dir/x86/poppler-0.69.0/lib +includedir=C:/temp/poppler-install-dir/x86/poppler-0.69.0/include + +Name: poppler +Description: PDF rendering library +Version: 0.68.0 + +Libs: -L${libdir} -lpoppler +Cflags: -I${includedir}/poppler diff --git a/lib/poppler-0.68.0/share/man/man1/pdfdetach.1 b/lib/poppler-0.68.0/share/man/man1/pdfdetach.1 new file mode 100644 index 0000000000000000000000000000000000000000..525173ed134ef7402bb104746bedfe38358f0bf0 --- /dev/null +++ b/lib/poppler-0.68.0/share/man/man1/pdfdetach.1 @@ -0,0 +1,89 @@ +.\" Copyright 2011 Glyph & Cog, LLC +.TH pdfdetach 1 "15 August 2011" +.SH NAME +pdfdetach \- Portable Document Format (PDF) document embedded file +extractor (version 3.03) +.SH SYNOPSIS +.B pdfdetach +[options] +.RI [ PDF-file ] +.SH DESCRIPTION +.B Pdfdetach +lists or extracts embedded files (attachments) from a Portable +Document Format (PDF) file. +.SH OPTIONS +Some of the following options can be set with configuration file +commands. These are listed in square brackets with the description of +the corresponding command line option. +.TP +.B \-list +List all of the embedded files in the PDF file. File names are +converted to the text encoding specified by the "\-enc" switch. +.TP +.BI \-save " number" +Save the specified embedded file. By default, this uses the file name +associated with the embedded file (as printed by the "\-list" switch); +the file name can be changed with the "\-o" switch. +.TP +.BI \-saveall +Save all of the embedded files. This uses the file names associated +with the embedded files (as printed by the "\-list" switch). By +default, the files are saved in the current directory; this can be +changed with the "\-o" switch. +.TP +.BI \-o " path" +Set the file name used when saving an embedded file with the "\-save" +switch, or the directory used by "\-saveall". +.TP +.BI \-enc " encoding-name" +Sets the encoding to use for text output (embedded file names). +This defaults to "UTF-8". +.TP +.BI \-opw " password" +Specify the owner password for the PDF file. Providing this will +bypass all security restrictions. +.TP +.BI \-upw " password" +Specify the user password for the PDF file. +.TP +.B \-v +Print copyright and version information. +.TP +.B \-h +Print usage information. +.RB ( \-help +and +.B \-\-help +are equivalent.) +.SH EXIT CODES +The Xpdf tools use the following exit codes: +.TP +0 +No error. +.TP +1 +Error opening a PDF file. +.TP +2 +Error opening an output file. +.TP +3 +Error related to PDF permissions. +.TP +99 +Other error. +.SH AUTHOR +The pdfinfo software and documentation are copyright 1996-2011 Glyph & +Cog, LLC. +.SH "SEE ALSO" +.BR pdffonts (1), +.BR pdfimages (1), +.BR pdfinfo (1), +.BR pdftocairo (1), +.BR pdftohtml (1), +.BR pdftoppm (1), +.BR pdftops (1), +.BR pdftotext (1) +.BR pdfseparate (1), +.BR pdfsig (1), +.BR pdfunite (1) diff --git a/lib/poppler-0.68.0/share/man/man1/pdffonts.1 b/lib/poppler-0.68.0/share/man/man1/pdffonts.1 new file mode 100644 index 0000000000000000000000000000000000000000..45d1e9726a6c5bc8ee893614ac9f7e01fb7378be --- /dev/null +++ b/lib/poppler-0.68.0/share/man/man1/pdffonts.1 @@ -0,0 +1,124 @@ +.\" Copyright 1999-2011 Glyph & Cog, LLC +.TH pdffonts 1 "15 August 2011" +.SH NAME +pdffonts \- Portable Document Format (PDF) font analyzer (version +3.03) +.SH SYNOPSIS +.B pdffonts +[options] +.RI [ PDF-file ] +.SH DESCRIPTION +.B Pdffonts +lists the fonts used in a Portable Document Format (PDF) file along +with various information for each font. +.PP +The following information is listed for each font: +.TP +.B name +the font name, exactly as given in the PDF file (potentially including +a subset prefix) +.TP +.B type +the font type \(en see below for details +.TP +.B encoding +the font encoding +.TP +.B emb +"yes" if the font is embedded in the PDF file +.TP +.B sub +"yes" if the font is a subset +.TP +.B uni +"yes" if there is an explicit "ToUnicode" map in the PDF file (the +absence of a ToUnicode map doesn't necessarily mean that the text +can't be converted to Unicode) +.TP +.B object ID +the font dictionary object ID (number and generation) +.PP +PDF files can contain the following types of fonts: +.PP +.RS +Type 1 +.RE +.RS +Type 1C \(en aka Compact Font Format (CFF) +.RE +.RS +Type 3 +.RE +.RS +TrueType +.RE +.RS +CID Type 0 \(en 16-bit font with no specified type +.RE +.RS +CID Type 0C \(en 16-bit PostScript CFF font +.RE +.RS +CID TrueType \(en 16-bit TrueType font +.RE +.SH OPTIONS +.TP +.BI \-f " number" +Specifies the first page to analyze. +.TP +.BI \-l " number" +Specifies the last page to analyze. +.TP +.B \-subst +List the substitute fonts that poppler will use for non embedded fonts. +.TP +.BI \-opw " password" +Specify the owner password for the PDF file. Providing this will +bypass all security restrictions. +.TP +.BI \-upw " password" +Specify the user password for the PDF file. +.TP +.B \-v +Print copyright and version information. +.TP +.B \-h +Print usage information. +.RB ( \-help +and +.B \-\-help +are equivalent.) +.SH EXIT CODES +The Xpdf tools use the following exit codes: +.TP +0 +No error. +.TP +1 +Error opening a PDF file. +.TP +2 +Error opening an output file. +.TP +3 +Error related to PDF permissions. +.TP +99 +Other error. +.SH AUTHOR +The pdffonts software and documentation are copyright 1996\(en2011 Glyph +& Cog, LLC. +.SH "SEE ALSO" +.nh +.ad l +.BR pdfdetach (1), +.BR pdfimages (1), +.BR pdfinfo (1), +.BR pdftocairo (1), +.BR pdftohtml (1), +.BR pdftoppm (1), +.BR pdftops (1), +.BR pdftotext (1), +.BR pdfseparate (1), +.BR pdfsig (1), +.BR pdfunite (1) diff --git a/lib/poppler-0.68.0/share/man/man1/pdfimages.1 b/lib/poppler-0.68.0/share/man/man1/pdfimages.1 new file mode 100644 index 0000000000000000000000000000000000000000..ee270d38594388c54953b93da502061bd068eb3f --- /dev/null +++ b/lib/poppler-0.68.0/share/man/man1/pdfimages.1 @@ -0,0 +1,263 @@ +.\" Copyright 1998-2011 Glyph & Cog, LLC +.TH pdfimages 1 "15 August 2011" +.SH NAME +pdfimages \- Portable Document Format (PDF) image extractor +(version 3.03) +.SH SYNOPSIS +.B pdfimages +[options] +.I PDF-file image-root +.SH DESCRIPTION +.B Pdfimages +saves images from a Portable Document Format (PDF) file as Portable +Pixmap (PPM), Portable Bitmap (PBM), Portable Network Graphics (PNG), +Tagged Image File Format (TIFF), JPEG, JPEG2000, or JBIG2 files. +.PP +Pdfimages reads the PDF file +.IR PDF-file , +scans one or more pages, and writes one file for each image, +.IR image-root - nnn . xxx , +where +.I nnn +is the image number and +.I xxx +is the image type (.ppm, .pbm, .png, .tif, .jpg, jp2, jb2e, or jb2g). +.PP +The default output format is PBM (for monochrome images) or PPM for +non-monochrome. The \-png or \-tiff options change to default output +to PNG or TIFF respectively. If both \-png and \-tiff are specified, +CMYK images will be written as TIFF and all other images will be +written as PNG. In addition the \-j, \-jp2, and \-jbig2 options will +cause JPEG, JPEG2000, and JBIG2, respectively, images in the PDF file +to be written in their native format. +.SH OPTIONS +.TP +.BI \-f " number" +Specifies the first page to scan. +.TP +.BI \-l " number" +Specifies the last page to scan. +.TP +.B \-png +Change the default output format to PNG. +.TP +.B \-tiff +Change the default output format to TIFF. +.TP +.B \-j +Write images in JPEG format as JPEG files instead of the default format. The JPEG file is identical to the JPEG data stored in the PDF. +.TP +.B \-jp2 +Write images in JPEG2000 format as JP2 files instead of the default format. The JP2 file is identical to the JPEG2000 data stored in the PDF. +.TP +.B \-jbig2 +Write images in JBIG2 format as JBIG2 files instead of the default format. JBIG2 data in PDF is of the embedded type. The embedded type of JBIG2 has an optional separate file containing global data. The embedded data is written with the extension .jb2e and the global data (if available) will be written to the same image number with the extension .jb2g. The content of both these files is identical to the JBIG2 data in the PDF. +.TP +.B \-ccitt +Write images in CCITT format as CCITT files instead of the default +format. The CCITT file is identical to the CCITT data stored in the +PDF. PDF files contain additional parameters specifying +how to decode the CCITT data. These parameters are translated to +fax2tiff input options and written to a .params file with the same image +number. The parameters are: +.RS +.TP +.B \-1 +1D Group 3 encoding +.TP +.B \-2 +2D Group 3 encoding +.TP +.B \-4 +Group 4 encoding +.TP +.B \-A +Beginning of line is aligned on a byte boundary +.TP +.B \-P +Beginning of line is not aligned on a byte boundary +.TP +.B \-X n +The image width in pixels +.TP +.B \-W +Encoding uses 1 for black and 0 for white +.TP +.B \-B +Encoding uses 0 for black and 1 for white +.TP +.B \-M +Input data fills from most significant bit to least significant bit. +.RE +.TP +.B \-all +Write JPEG, JPEG2000, JBIG2, and CCITT images in their native format. CMYK files are written as TIFF files. All other images are written as PNG files. +This is equivalent to specifying the options \-png \-tiff \-j \-jp2 \-jbig2 \-ccitt. +.TP +.B \-list +Instead of writing the images, list the images along with various information for each image. Do not specify an +.IR image-root +with this option. +.IP +The following information is listed for each image: +.RS +.TP +.B page +the page number containing the image +.TP +.B num +the image number +.TP +.B type +the image type: +.PP +.RS +image - an opaque image +.RE +.RS +mask - a monochrome mask image +.RE +.RS +smask - a soft-mask image +.RE +.RS +stencil - a monochrome mask image used for painting a color or pattern +.RE +.PP +Note: Tranparency in images is represented in PDF using a separate image for the image and the mask/smask. +The mask/smask used as part of a transparent image always immediately follows the image in the image list. +.TP +.B width +image width (in pixels) +.TP +.B height +image height (in pixels) +.PP +Note: the image width/height is the size of the embedded image, not the size the image will be rendered at. +.TP +.B color +image color space: +.PP +.RS +gray - Gray +.RE +.RS +rgb - RGB +.RE +.RS +cmyk - CMYK +.RE +.RS +lab - L*a*b +.RE +.RS +icc - ICC Based +.RE +.RS +index - Indexed Color +.RE +.RS +sep - Separation +.RE +.RS +devn - DeviceN +.RE +.TP +.B comp +number of color components +.TP +.B bpc +bits per component +.TP +.B enc +encoding: +.PP +.RS +image - raster image (may be Flate or LZW compressed but does not use an image encoding) +.RE +.RS +jpeg - Joint Photographic Experts Group +.RE +.RS +jp2 - JPEG2000 +.RE +.RS +jbig2 - Joint Bi-Level Image Experts Group +.RE +.RS +ccitt - CCITT Group 3 or Group 4 Fax +.RE +.TP +.B interp +"yes" if the interpolation is to be performed when scaling up the image +.TP +.B object ID +the image dictionary object ID (number and generation) +.TP +.B x\-ppi +The horizontal resolution of the image (in pixels per inch) when rendered on the pdf page. +.TP +.B y\-ppi +The vertical resolution of the image (in pixels per inch) when rendered on the pdf page. +.TP +.B size +The size of the embedded image in the pdf file. The following suffixes are used: 'B' bytes, 'K' kilobytes, 'M' megabytes, and 'G' gigabytes. +.TP +.B ratio +The compression ratio of the embedded image. +.RE +.TP +.BI \-opw " password" +Specify the owner password for the PDF file. Providing this will +bypass all security restrictions. +.TP +.BI \-upw " password" +Specify the user password for the PDF file. +.TP +.B \-p +Include page numbers in output file names. +.TP +.B \-q +Don't print any messages or errors. +.TP +.B \-v +Print copyright and version information. +.TP +.B \-h +Print usage information. +.RB ( \-help +and +.B \-\-help +are equivalent.) +.SH EXIT CODES +The Xpdf tools use the following exit codes: +.TP +0 +No error. +.TP +1 +Error opening a PDF file. +.TP +2 +Error opening an output file. +.TP +3 +Error related to PDF permissions. +.TP +99 +Other error. +.SH AUTHOR +The pdfimages software and documentation are copyright 1998-2011 Glyph +& Cog, LLC. +.SH "SEE ALSO" +.BR pdfdetach (1), +.BR pdffonts (1), +.BR pdfinfo (1), +.BR pdftocairo (1), +.BR pdftohtml (1), +.BR pdftoppm (1), +.BR pdftops (1), +.BR pdftotext (1) +.BR pdfseparate (1), +.BR pdfsig (1), +.BR pdfunite (1) diff --git a/lib/poppler-0.68.0/share/man/man1/pdfinfo.1 b/lib/poppler-0.68.0/share/man/man1/pdfinfo.1 new file mode 100644 index 0000000000000000000000000000000000000000..b699dff1c81fd189ef8773490def364c1a89b721 --- /dev/null +++ b/lib/poppler-0.68.0/share/man/man1/pdfinfo.1 @@ -0,0 +1,173 @@ +.\" Copyright 1999-2011 Glyph & Cog, LLC +.TH pdfinfo 1 "15 August 2011" +.SH NAME +pdfinfo \- Portable Document Format (PDF) document information +extractor (version 3.03) +.SH SYNOPSIS +.B pdfinfo +[options] +.RI [ PDF-file ] +.SH DESCRIPTION +.B Pdfinfo +prints the contents of the \'Info' dictionary (plus some other useful +information) from a Portable Document Format (PDF) file. +.PP +The \'Info' dictionary contains the following values: +.PP +.RS +title +.RE +.RS +subject +.RE +.RS +keywords +.RE +.RS +author +.RE +.RS +creator +.RE +.RS +producer +.RE +.RS +creation date +.RE +.RS +modification date +.RE +.PP +In addition, the following information is printed: +.PP +.RS +tagged (yes/no) +.RE +.RS +form (AcroForm / XFA / none) +.RE +.RS +javascript (yes/no) +.RE +.RS +page count +.RE +.RS +encrypted flag (yes/no) +.RE +.RS +print and copy permissions (if encrypted) +.RE +.RS +page size +.RE +.RS +file size +.RE +.RS +linearized (yes/no) +.RE +.RS +PDF version +.RE +.RS +metadata (only if requested) +.RE +.PP +The options \-listenc, \-meta, \-js, \-struct, and \-struct-text only print the requested information. The 'Info' dictionary and related data listed above is not printed. At most one of these five options may be used. +.SH OPTIONS +.TP +.BI \-f " number" +Specifies the first page to examine. If multiple pages are requested +using the "\-f" and "\-l" options, the size of each requested page (and, +optionally, the bounding boxes for each requested page) are printed. +Otherwise, only page one is examined. +.TP +.BI \-l " number" +Specifies the last page to examine. +.TP +.B \-box +Prints the page box bounding boxes: MediaBox, CropBox, BleedBox, +TrimBox, and ArtBox. +.TP +.B \-meta +Prints document-level metadata. (This is the "Metadata" stream from +the PDF file's Catalog object.) +.TP +.B \-js +Prints all JavaScript in the PDF. +.TP +.B \-struct +Prints the logical document structure of a Tagged-PDF file. +.TP +.B \-struct-text +Print the textual content along with the document structure of a Tagged-PDF +file. Note that extracting text this way might be slow for big PDF files. +(Implies +.BR \-struct .) +.TP +.B \-isodates +Prints dates in ISO-8601 format (including the time zone). +.TP +.B \-rawdates +Prints the raw (undecoded) date strings, directly from the PDF file. +.TP +.B \-dests +Print a list of all named destinations. If a page range is specified using "\-f" and "\-l", only +destinations in the page range are listed. +.TP +.BI \-enc " encoding-name" +Sets the encoding to use for text output. This defaults to "UTF-8". +.TP +.B \-listenc +Lits the available encodings +.TP +.BI \-opw " password" +Specify the owner password for the PDF file. Providing this will +bypass all security restrictions. +.TP +.BI \-upw " password" +Specify the user password for the PDF file. +.TP +.B \-v +Print copyright and version information. +.TP +.B \-h +Print usage information. +.RB ( \-help +and +.B \-\-help +are equivalent.) +.SH EXIT CODES +The Xpdf tools use the following exit codes: +.TP +0 +No error. +.TP +1 +Error opening a PDF file. +.TP +2 +Error opening an output file. +.TP +3 +Error related to PDF permissions. +.TP +99 +Other error. +.SH AUTHOR +The pdfinfo software and documentation are copyright 1996-2011 Glyph & +Cog, LLC. +.SH "SEE ALSO" +.BR pdfdetach (1), +.BR pdffonts (1), +.BR pdfimages (1), +.BR pdftocairo (1), +.BR pdftohtml (1), +.BR pdftoppm (1), +.BR pdftops (1), +.BR pdftotext (1) +.BR pdfseparate (1), +.BR pdfsig (1), +.BR pdfunite (1) diff --git a/lib/poppler-0.68.0/share/man/man1/pdfseparate.1 b/lib/poppler-0.68.0/share/man/man1/pdfseparate.1 new file mode 100644 index 0000000000000000000000000000000000000000..132511a12c87ad5c071dca31002c5db4bd8f15d2 --- /dev/null +++ b/lib/poppler-0.68.0/share/man/man1/pdfseparate.1 @@ -0,0 +1,60 @@ +.\" Copyright 2011 The Poppler Developers - http://poppler.freedesktop.org +.TH pdfseparate 1 "15 September 2011" +.SH NAME +pdfseparate \- Portable Document Format (PDF) page extractor +.SH SYNOPSIS +.B pdfseparate +[options] +.I PDF-file PDF-page-pattern +.SH DESCRIPTION +.B pdfseparate +extract single pages from a Portable Document Format (PDF). +.PP +pdfseparate reads the PDF file +.IR PDF-file , +extracts one or more pages, and writes one PDF file for each page to +.IR PDF-page-pattern. +.PP +PDF-page-pattern should contain +.BR %d +(or any variant respecting printf format), since %d is replaced by the page number. +.TP +The PDF-file should not be encrypted. +.SH OPTIONS +.TP +.BI \-f " number" +Specifies the first page to extract. If \-f is omitted, extraction starts with page 1. +.TP +.BI \-l " number" +Specifies the last page to extract. If \-l is omitted, extraction ends with the last page. +.TP +.B \-v +Print copyright and version information. +.TP +.B \-h +Print usage information. +.RB ( \-help +and +.B \-\-help +are equivalent.) +.SH EXAMPLE +pdfseparate sample.pdf sample-%d.pdf +.TP +extracts all pages from sample.pdf, if i.e. sample.pdf has 3 pages, it produces +.TP +sample-1.pdf, sample-2.pdf, sample-3.pdf +.SH AUTHOR +The pdfseparate software and documentation are copyright 1996-2004 Glyph +& Cog, LLC and copyright 2005-2011 The Poppler Developers - http://poppler.freedesktop.org +.SH "SEE ALSO" +.BR pdfdetach (1), +.BR pdffonts (1), +.BR pdfimages (1), +.BR pdfinfo (1), +.BR pdftocairo (1), +.BR pdftohtml (1), +.BR pdftoppm (1), +.BR pdftops (1), +.BR pdftotext (1) +.BR pdfsig (1), +.BR pdfunite (1) diff --git a/lib/poppler-0.68.0/share/man/man1/pdftocairo.1 b/lib/poppler-0.68.0/share/man/man1/pdftocairo.1 new file mode 100644 index 0000000000000000000000000000000000000000..8de2392584bd5f77281181e76f0664017d573e7e --- /dev/null +++ b/lib/poppler-0.68.0/share/man/man1/pdftocairo.1 @@ -0,0 +1,346 @@ +.TH pdftocairo 1 +.SH NAME +pdftocairo \- Portable Document Format (PDF) to PNG/JPEG/TIFF/PDF/PS/EPS/SVG using cairo +.SH SYNOPSIS +.B pdftocairo +[options] +.IR PDF-file +.RI [ output-file ] +.SH DESCRIPTION +.B pdftocairo +converts Portable Document Format (PDF) files, using the cairo output device of the poppler PDF library, to any of the following output formats: +.IP \(bu +Portable Network Graphics (PNG) +.IP \(bu +JPEG Interchange Format (JPEG) +.IP \(bu +Tagged Image File Format (TIFF) +.IP \(bu +Portable Document Format (PDF) +.IP \(bu +PostScript (PS) +.IP \(bu +Encapsulated PostScript (EPS) +.IP \(bu +Scalable Vector Graphics (SVG) +.IP \(bu +Windows Printer +.PP +.B pdftocairo +reads the PDF file, +.IR PDF-file , +and writes to +.IR output-file . +The image formats (PNG, JPEG, and TIFF) generate one file per page with the page number and file type appended to +.IR output-file . +When \-singlefile is used with the image formats, the file type is appended to +.IR output-file . +When the output format is a vector format (PDF, PS, EPS, and SVG), +.IR output-file +is the full filename. + +If the +.IR PDF-file +is \*(lq\-\*(rq , the PDF is read from stdin. +If the +.IR output-file +is \*(lq\-\*(rq , the output file will be written to stdout. Using stdout is not valid with image formats unless \-singlefile is used. +If +.IR output-file +is not used, the output filename will be derived from the +.IR PDF-file +filename. +.PP +Not all options are valid with all output formats. One (and only one) of the output format options (\-png, \-jpeg, \-tiff, \-pdf, \-print, \-ps, \-eps, or \-svg) must be used. +.PP +The resolution options (\-r, \-rx, \-ry) set the resolution of the +image output formats. The image dimensions will depend on the PDF page +size and the resolution. For the vector outputs, regions of the page +that can not be represented natively in the output format (eg +translucency in PS) will be rasterized at the resolution specified by +the resolution options. +.PP +The \-scale-to options may be used to set a fixed image size. The +image resolution will vary with the page size. +.PP +The cropping options (\-x, \-y, \-W, and \-H) use units of pixels with +the image formats and PostScript points (1/72 inch) with the vector +formats. When cropping is used with vector output the cropped region is +centered unless \-nocenter is used in which case the cropped region is +at the top left (SVG) or bottom left (PDF, PS, EPS). +.PP +.SH OPTIONS +.TP +.BI \-png +Generates a PNG file(s) +.TP +.BI \-jpeg +Generates a JPEG file(s). See also \-jpegopt. +.TP +.BI \-tiff +Generates a TIFF file(s) +.TP +.BI \-pdf +Generates a PDF file +.TP +.BI \-ps +Generate a PS file +.TP +.BI \-eps +Generate an EPS file. An EPS file contains a single image, so if you +use this option with a multi-page PDF file, you must use \-f and \-l +to specify a single page. The page size options (\-origpagesizes, +\-paper, \-paperw, \-paperh) can not be used with this option. +.TP +.BI \-svg +Generate a SVG (Scalable Vector Graphics) file +.TP +.BI \-print +(Windows only) Prints to a system printer. See also \-printer and \-printeropt. + If an output file is not specified, the output will be sent to the printer. + The output file '-' can not be used with this option. +.TP +.BI \-printdlg +(Windows only) Prints to a system printer. Displays the print dialog to allow +the print options to be modified before printing. +.TP +.BI \-f " number" +Specifies the first page to convert. +.TP +.BI \-l " number" +Specifies the last page to convert. +.TP +.B \-o +Generates only the odd numbered pages. +.TP +.B \-e +Generates only the even numbered pages. +.TP +.BI \-singlefile +Writes only the first page and does not add digits. +.TP +.BI \-r " number" +Specifies the X and Y resolution, in pixels per inch of image files (or rasterized regions in vector output). The default is 150 PPI. +.TP +.BI \-rx " number" +Specifies the X resolution, in pixels per inch of image files (or rasterized regions in vector output). The default is 150 PPI. +.TP +.BI \-ry " number" +Specifies the Y resolution, in pixels per inch of image files (or rasterized regions in vector output). The default is 150 PPI. +.TP +.BI \-scale-to " number" +Scales the long side of each page (width for landscape pages, height +for portrait pages) to fit in scale-to pixels. The size of the short +side will be determined by the aspect ratio of the page (PNG/JPEG/TIFF only). +.TP +.BI \-scale-to-x " number" +Scales each page horizontally to fit in scale-to-x pixels. If +scale-to-y is set to -1, the vertical size will determined by the +aspect ratio of the page (PNG/JPEG/TIFF only). +.TP +.BI \-scale-to-y " number" +Scales each page vertically to fit in scale-to-y pixels. If scale-to-x +is set to -1, the horizontal size will determined by the aspect ratio +of the page (PNG/JPEG/TIFF only). +.TP +.BI \-x " number" +Specifies the x-coordinate of the crop area top left corner in pixels (image output) or points (vector output) +.TP +.BI \-y " number" +Specifies the y-coordinate of the crop area top left corner in pixels (image output) or points (vector output) +.TP +.BI \-W " number" +Specifies the width of crop area in pixels (image output) or points (vector output) (default is 0) +.TP +.BI \-H " number" +Specifies the height of crop area in pixels (image output) or points (vector output) (default is 0) +.TP +.BI \-sz " number" +Specifies the size of crop square in pixels (image output) or points (vector output) (sets \-W and \-H) +.TP +.B \-cropbox +Uses the crop box rather than media box when generating the files (PNG/JPEG/TIFF only) +.TP +.B \-mono +Generate a monochrome file (PNG and TIFF only). +.TP +.B \-gray +Generate a grayscale file (PNG, JPEG, and TIFF only). +.TP +.B \-antialias +Set the cairo antialias option used for text and drawing in image files (or rasterized regions in vector output). The options are: +.RS +.TP +.B default +Use the default antialiasing for the target device. This is the default setting if \-antialias is not used. +.TP +.B none +Antialiasing is disabled. +.TP +.B gray +Perform single-color antialiasing using shades of gray. +.TP +.B subpixel +Perform antialiasing by taking advantage of the order of subpixel elements on devices such as LCD. +.TP +.B fast +Hint that the backend should perform some antialiasing but prefer speed over quality. +.TP +.B good +The backend should balance quality against performance. +.TP +.B best +Hint that the backend should render at the highest quality, sacrificing speed if necessary. +.RE +.TP +.B \-transp +Use a transparent page color instead of white (PNG and TIFF only). +.TP +.BI \-icc " icc-file" +Use the specified ICC file as the output profile (PNG only). The profile will be embedded in the PNG file. +.TP +.BI \-jpegopt " jpeg-options" +When used with \-jpeg, takes a list of options to control the jpeg compression. See +.B JPEG OPTIONS +for the available options. +.TP +.B \-level2 +Generate Level 2 PostScript (PS only). +.TP +.B \-level3 +Generate Level 3 PostScript (PS only). This enables all Level 2 features plus +shading patterns and masked images. This is the default setting. +.TP +.B \-origpagesizes +This option is the same as "\-paper match". +.TP +.BI \-paper " size" +Set the paper size to one of "letter", "legal", "A4", or "A3" +(PS,PDF,SVG only). This can also be set to "match", which will set +the paper size of each page to match the size specified in the PDF +file. If none the \-paper, \-paperw, or \-paperh options are +specified the default is to match the paper size. +.TP +.BI \-paperw " size" +Set the paper width, in points (PS,PDF,SVG only). +.TP +.BI \-paperh " size" +Set the paper height, in points (PS,PDF,SVG only). +.TP +.B \-nocrop +By default, printing output is cropped to the CropBox specified in the PDF +file. This option disables cropping (PS,PDF,SVG only). +.TP +.B \-expand +Expand PDF pages smaller than the paper to fill the paper (PS,PDF,SVG only). By +default, these pages are not scaled. +.TP +.B \-noshrink +Don't scale PDF pages which are larger than the paper (PS,PDF,SVG only). By default, +pages larger than the paper are shrunk to fit. +.TP +.B \-nocenter +By default, PDF pages smaller than the paper (after any scaling) are +centered on the paper. This option causes them to be aligned to the +lower-left corner of the paper instead (PS,PDF,SVG only). +.TP +.B \-duplex +Adds the %%IncludeFeature: *Duplex DuplexNoTumble DSC comment to the +PostScript file (PS only). This tells the print manager to enable duplexing. +.TP +.BI \-printer " printer-name" +(Windows only). When used with \-print, specifies the name of the printer to be used, instead of the system default. +.TP +.BI \-printopt " printer-options" +(Windows only). When used with \-print, takes a list of options to be used to configure the printer. See +.B WINDOWS PRINTER OPTIONS +for the available options. +.TP +.BI \-setupdlg +(Windows only). When used with \-print, the printer properties dialog is displayed +allowing the print settings to be modified before printing. The paper size selected +in the print properties dialog will be used except when -origpagesizes is specified. +.TP +.BI \-opw " password" +Specify the owner password for the PDF file. Providing this will +bypass all security restrictions. +.TP +.BI \-upw " password" +Specify the user password for the PDF file. +.TP +.B \-q +Don't print any messages or errors. +.TP +.B \-v +Print copyright and version information. +.TP +.B \-h +Print usage information. +.RB ( \-help +and +.B \-\-help +are equivalent.) +.SH EXIT CODES +The poppler tools use the following exit codes: +.TP +0 +No error. +.TP +1 +Error opening a PDF file. +.TP +2 +Error opening an output file. +.TP +3 +Error related to PDF permissions. +.TP +4 +Error related to ICC profile. +.TP +99 +Other error. +.SH JPEG OPTIONS +When JPEG output is specified, the \-jpegopt option can be used to control the JPEG compression parameters. +It takes a string of the form "=[,=]". Currently the available options are: +.TP +.BI quality +Selects the JPEG quality value. The value must be an integer between 0 and 100. +.TP +.BI progressive +Select progressive JPEG output. The possible values are "y", "n", +indicating progressive (yes) or non-progressive (no), respectively. +.TP +.BI optimize +Sets whether to compute optimal Huffman coding tables for the JPEG output, which +will create smaller files but make an extra pass over the data. The value must +be "y" or "n", with "y" performing optimization, otherwise the default Huffman +tables are used. +.SH WINDOWS PRINTER OPTIONS +In Windows, you can use the \-print option to print directly to a system printer. Additionally, you can use the \-printopt +option to configure the printer. It takes a string of the form "=[,=]". Currently the available options are: +.TP +.BI source +Selects the source paper tray to be used (bin). The possible values are "upper", "onlyone", "lower", "middle", "manual", "envelope", +"envmanual", "auto", "tractor", "smallfmt", "largefmt", "largecapacity", "formsource", or a numeric value to choose a driver specific source. +.TP +.BI duplex +Sets the duplex mode of the printer. The possible values are "off", "short" or "long", +indicating no duplexing, short-edge binding, or long-edge binding, respectively. +General option \-duplex is a synonym of "duplex=long". If both options are specified, +\-printopt has priority. +.SH AUTHOR +The pdftocairo software and documentation are copyright 1996-2004 Glyph +& Cog, LLC and copyright 2005-2011 The Poppler Developers. +.SH "SEE ALSO" +.BR pdfdetach (1), +.BR pdffonts (1), +.BR pdfimages (1), +.BR pdfinfo (1), +.BR pdftohtml (1), +.BR pdftoppm (1), +.BR pdftops (1), +.BR pdftotext (1) +.BR pdfseparate (1), +.BR pdfsig (1), +.BR pdfunite (1) diff --git a/lib/poppler-0.68.0/share/man/man1/pdftohtml.1 b/lib/poppler-0.68.0/share/man/man1/pdftohtml.1 new file mode 100644 index 0000000000000000000000000000000000000000..5d711ba91f59f2b4fbb67b260e271213bec1344f --- /dev/null +++ b/lib/poppler-0.68.0/share/man/man1/pdftohtml.1 @@ -0,0 +1,113 @@ +.TH PDFTOHTML 1 +.\" NAME should be all caps, SECTION should be 1-8, maybe w/ subsection +.\" other parms are allowed: see man(7), man(1) +.SH NAME +pdftohtml \- program to convert PDF files into HTML, XML and PNG images +.SH SYNOPSIS +.B pdftohtml +.I "[options] [ ]" +.SH "DESCRIPTION" +This manual page documents briefly the +.BR pdftohtml +command. +This manual page was written for the Debian GNU/Linux distribution +because the original program does not have a manual page. +.PP +.B pdftohtml +is a program that converts PDF documents into HTML. It generates its output in +the current working directory. +.SH OPTIONS +A summary of options are included below. +.TP +.B \-h, \-help +Show summary of options. +.TP +.B \-f +first page to print +.TP +.B \-l +last page to print +.TP +.B \-q +do not print any messages or errors +.TP +.B \-v +print copyright and version info +.TP +.B \-p +exchange .pdf links with .html +.TP +.B \-c +generate complex output +.TP +.B \-s +generate single HTML that includes all pages +.TP +.B \-i +ignore images +.TP +.B \-noframes +generate no frames. Not supported in complex output mode. +.TP +.B \-stdout +use standard output +.TP +.B \-zoom +zoom the PDF document (default 1.5) +.TP +.B \-xml +output for XML post-processing +.TP +.B \-noRoundedCoordinates +do not round coordinates (with XML output only) +.TP +.B \-enc +output text encoding name +.TP +.B \-opw +owner password (for encrypted files) +.TP +.B \-upw +user password (for encrypted files) +.TP +.B \-hidden +force hidden text extraction +.TP +.B \-fmt +image file format for Splash output (png or jpg). +If complex is selected, but \-fmt is not specified, +\-fmt png will be assumed +.TP +.B \-nomerge +do not merge paragraphs +.TP +.B \-nodrm +override document DRM settings +.TP +.B \-wbt +adjust the word break threshold percent. Default is 10. +Word break occurs when distance between two adjacent characters is +greater than this percent of character height. +.TP +.B \-fontfullname +outputs the font name without any substitutions. + +.SH AUTHOR + +Pdftohtml was developed by Gueorgui Ovtcharov and Rainer Dorsch. It is +based and benefits a lot from Derek Noonburg's xpdf package. + +This manual page was written by Søren Boll Overgaard , +for the Debian GNU/Linux system (but may be used by others). +.SH "SEE ALSO" +.BR pdfdetach (1), +.BR pdffonts (1), +.BR pdfimages (1), +.BR pdfinfo (1), +.BR pdftocairo (1), +.BR pdftoppm (1), +.BR pdftops (1), +.BR pdftotext (1) +.BR pdfseparate (1), +.BR pdfsig (1), +.BR pdfunite (1) diff --git a/lib/poppler-0.68.0/share/man/man1/pdftoppm.1 b/lib/poppler-0.68.0/share/man/man1/pdftoppm.1 new file mode 100644 index 0000000000000000000000000000000000000000..75a4f473c72e5b71f94acbb948e03cd242cc2740 --- /dev/null +++ b/lib/poppler-0.68.0/share/man/man1/pdftoppm.1 @@ -0,0 +1,194 @@ +.\" Copyright 2005-2011 Glyph & Cog, LLC +.TH pdftoppm 1 "15 August 2011" +.SH NAME +pdftoppm \- Portable Document Format (PDF) to Portable Pixmap (PPM) +converter (version 3.03) +.SH SYNOPSIS +.B pdftoppm +[options] +.I PDF-file PPM-root +.SH DESCRIPTION +.B Pdftoppm +converts Portable Document Format (PDF) files to color image files in +Portable Pixmap (PPM) format, grayscale image files in Portable +Graymap (PGM) format, or monochrome image files in Portable Bitmap +(PBM) format. +.PP +Pdftoppm reads the PDF file, +.IR PDF-file , +and writes one PPM file for each page, +.IR PPM-root - number .ppm, +where +.I number +is the page number. +.SH OPTIONS +.TP +.BI \-f " number" +Specifies the first page to convert. +.TP +.BI \-l " number" +Specifies the last page to convert. +.TP +.B \-o +Generates only the odd numbered pages. +.TP +.B \-e +Generates only the even numbered pages. +.TP +.BI \-singlefile +Writes only the first page and does not add digits. +.TP +.BI \-r " number" +Specifies the X and Y resolution, in DPI. The default is 150 DPI. +.TP +.BI \-rx " number" +Specifies the X resolution, in DPI. The default is 150 DPI. +.TP +.BI \-ry " number" +Specifies the Y resolution, in DPI. The default is 150 DPI. +.TP +.BI \-scale-to " number" +Scales the long side of each page (width for landscape pages, height +for portrait pages) to fit in scale-to pixels. The size of the short +side will be determined by the aspect ratio of the page. +.TP +.BI \-scale-to-x " number" +Scales each page horizontally to fit in scale-to-x pixels. If +scale-to-y is set to -1, the vertical size will determined by the +aspect ratio of the page. +.TP +.BI \-scale-to-y " number" +Scales each page vertically to fit in scale-to-y pixels. If scale-to-x +is set to -1, the horizontal size will determined by the aspect ratio +of the page. +.TP +.BI \-x " number" +Specifies the x-coordinate of the crop area top left corner +.TP +.BI \-y " number" +Specifies the y-coordinate of the crop area top left corner +.TP +.BI \-W " number" +Specifies the width of crop area in pixels (default is 0) +.TP +.BI \-H " number" +Specifies the height of crop area in pixels (default is 0) +.TP +.BI \-sz " number" +Specifies the size of crop square in pixels (sets W and H) +.TP +.B \-cropbox +Uses the crop box rather than media box when generating the files +.TP +.B \-mono +Generate a monochrome PBM file (instead of a color PPM file). +.TP +.B \-gray +Generate a grayscale PGM file (instead of a color PPM file). +.TP +.B \-png +Generates a PNG file instead a PPM file. +.TP +.B \-jpeg +Generates a JPEG file instead a PPM file. +.TP +.BI \-jpegopt " jpeg-options" +When used with \-jpeg, takes a list of options to control the jpeg compression. See +.B JPEG OPTIONS +for the available options. +.TP +.B \-tiff +Generates a TIFF file instead a PPM file. +.TP +.BI \-tiffcompression " none | packbits | jpeg | lzw | deflate" +Specifies the TIFF compression type. This defaults to "none". +.TP +.BI \-freetype " yes | no" +Enable or disable FreeType (a TrueType / Type 1 font rasterizer). +This defaults to "yes". +.TP +.BI \-thinlinemode " none | solid | shape" +Specifies the thin line mode. This defaults to "none". +.TP +"solid": +adjust lines with a width less than one pixel to pixel boundary +and paint it with a width of one pixel. +.TP +"shape": +adjust lines with a width less than one pixel to pixel boundary +and paint it with a width of one pixel but with a shape in proportion +to its width. +.TP +.BI \-aa " yes | no" +Enable or disable font anti-aliasing. This defaults to "yes". +.TP +.BI \-aaVector " yes | no" +Enable or disable vector anti-aliasing. This defaults to "yes". +.TP +.BI \-opw " password" +Specify the owner password for the PDF file. Providing this will +bypass all security restrictions. +.TP +.BI \-upw " password" +Specify the user password for the PDF file. +.TP +.B \-q +Don't print any messages or errors. +.TP +.B \-v +Print copyright and version information. +.TP +.B \-h +Print usage information. +.RB ( \-help +and +.B \-\-help +are equivalent.) +.SH EXIT CODES +The Xpdf tools use the following exit codes: +.TP +0 +No error. +.TP +1 +Error opening a PDF file. +.TP +2 +Error opening an output file. +.TP +3 +Error related to PDF permissions. +.TP +99 +Other error. +.SH JPEG OPTIONS +When JPEG output is specified, the \-jpegopt option can be used to control the JPEG compression parameters. +It takes a string of the form "=[,=]". Currently the available options are: +.TP +.BI quality +Selects the JPEG quality value. The value must be an integer between 0 and 100. +.TP +.BI progressive +Select progressive JPEG output. The possible values are "y", "n", +indicating progressive (yes) or non-progressive (no), respectively. +.TP +.BI optimize +Sets whether to compute optimal Huffman coding tables for the JPEG output, which +will create smaller files but make an extra pass over the data. The value must +be "y" or "n", with "y" performing optimization, otherwise the default Huffman +tables are used. +.SH AUTHOR +The pdftoppm software and documentation are copyright 1996-2011 Glyph +& Cog, LLC. +.SH "SEE ALSO" +.BR pdfdetach (1), +.BR pdffonts (1), +.BR pdfimages (1), +.BR pdfinfo (1), +.BR pdftocairo (1), +.BR pdftohtml (1), +.BR pdftops (1), +.BR pdftotext (1) +.BR pdfseparate (1), +.BR pdfsig (1), +.BR pdfunite (1) diff --git a/lib/poppler-0.68.0/share/man/man1/pdftops.1 b/lib/poppler-0.68.0/share/man/man1/pdftops.1 new file mode 100644 index 0000000000000000000000000000000000000000..6907b824014e623c74966ee36b2166447d0b2c33 --- /dev/null +++ b/lib/poppler-0.68.0/share/man/man1/pdftops.1 @@ -0,0 +1,237 @@ +.\" Copyright 1996-2011 Glyph & Cog, LLC +.TH pdftops 1 "15 August 2011" +.SH NAME +pdftops \- Portable Document Format (PDF) to PostScript converter +(version 3.03) +.SH SYNOPSIS +.B pdftops +[options] +.RI +.RI [] +.SH DESCRIPTION +.B Pdftops +converts Portable Document Format (PDF) files to PostScript so they +can be printed. +.PP +Pdftops reads the PDF file, +.IR PDF-file , +and writes a PostScript file, +.IR PS-file . +If +.I PS-file +is not specified, pdftops converts +.I file.pdf +to +.I file.ps +(or +.I file.eps +with the \-eps option). If +.I PS-file +is \'-', the PostScript is sent to stdout. +.SH OPTIONS +.TP +.BI \-f " number" +Specifies the first page to print. +.TP +.BI \-l " number" +Specifies the last page to print. +.TP +.B \-level1 +Generate Level 1 PostScript. The resulting PostScript files will be +significantly larger (if they contain images), but will print on Level +1 printers. This also converts all images to black and white. No +more than one of the PostScript level options (\-level1, \-level1sep, +\-level2, \-level2sep, \-level3, \-level3sep) may be given. +.TP +.B \-level1sep +Generate Level 1 separable PostScript. All colors are converted to +CMYK. Images are written with separate stream data for the four +components. +.TP +.B \-level2 +Generate Level 2 PostScript. Level 2 supports color images and image +compression. This is the default setting. +.TP +.B \-level2sep +Generate Level 2 separable PostScript. All colors are converted to +CMYK. The PostScript separation convention operators are used to +handle custom (spot) colors. +.TP +.B \-level3 +Generate Level 3 PostScript. This enables all Level 2 features plus +CID font embedding. +.TP +.B \-level3sep +Generate Level 3 separable PostScript. The separation handling is the +same as for \-level2sep. +.TP +.B \-eps +Generate an Encapsulated PostScript (EPS) file. An EPS file contains +a single image, so if you use this option with a multi-page PDF file, +you must use \-f and \-l to specify a single page. No more than one of +the mode options (\-eps, \-form) may be given. +.TP +.B \-form +Generate a PostScript form which can be imported by software that +understands forms. A form contains a single page, so if you use this +option with a multi-page PDF file, you must use \-f and \-l to specify a +single page. The \-level1 option cannot be used with \-form. No more +than one of the mode options (\-eps, \-form) may be +given. +.TP +.B \-opi +Generate OPI comments for all images and forms which have OPI +information. (This option is only available if pdftops was compiled +with OPI support.) +.TP +.B \-binary +Write binary data in Level 1 PostScript. By default, pdftops writes +hex-encoded data in Level 1 PostScript. Binary data is non-standard +in Level 1 PostScript but reduces the file size and can be useful +when Level 1 PostScript is required only for its restricted use +of PostScript operators. +.TP +.BI \-r " number" +Set the resolution in DPI when pdftops rasterizes images with +transparencies or, for Level 1 PostScript, when pdftops +rasterizes images with color masks. +By default, pdftops rasterizes images to 300 DPI. +.TP +.B \-noembt1 +By default, any Type 1 fonts which are embedded in the PDF file are +copied into the PostScript file. This option causes pdftops to +substitute base fonts instead. Embedded fonts make PostScript files +larger, but may be necessary for readable output. +.TP +.B \-noembtt +By default, any TrueType fonts which are embedded in the PDF file are +copied into the PostScript file. This option causes pdftops to +substitute base fonts instead. Embedded fonts make PostScript files +larger, but may be necessary for readable output. Also, some +PostScript interpreters do not have TrueType rasterizers. +.TP +.B \-noembcidps +By default, any CID PostScript fonts which are embedded in the PDF +file are copied into the PostScript file. This option disables that +embedding. No attempt is made to substitute for non-embedded CID +PostScript fonts. +.TP +.B \-noembcidtt +By default, any CID TrueType fonts which are embedded in the PDF file +are copied into the PostScript file. This option disables that +embedding. No attempt is made to substitute for non-embedded CID +TrueType fonts. +.TP +.B \-passfonts +By default, references to non-embedded 8-bit fonts in the PDF file are +substituted with the closest "Helvetica", "Times-Roman", or "Courier" font. +This option passes references to non-embedded fonts +through to the PostScript file. +.TP +.BI \-aaRaster " yes | no" +Enable or disable raster anti-aliasing. This defaults to "no". +pdftops may need to rasterize transparencies and pattern image masks in the PDF. +If the PostScript will be printed, leave \-aaRaster disabled and set \-r to the resolution of the printer. +If the PostScript will be viewed, enabling \-aaRaster may make rasterized text easier to read. +.TP +.B \-optimizecolorspace +By default, bitmap images in the PDF pass through to the output PostScript +in their original color space, which produces predictable results. +This option converts RGB and CMYK images into Gray images +if every pixel of the image has equal components. +This can fix problems when doing color separations of PDFs +that contain embedded black and white images encoded as RGB. +.TP +.B \-preload +preload images and forms +.TP +.BI \-paper " size" +Set the paper size to one of "letter", "legal", "A4", or "A3". This +can also be set to "match", which will set the paper size of each page to match the +size specified in the PDF file. If none the \-paper, \-paperw, or \-paperh +options are specified the default is to match the paper size. +.TP +.BI \-paperw " size" +Set the paper width, in points. +.TP +.BI \-paperh " size" +Set the paper height, in points. +.TP +.B \-origpagesizes +This option is the same as "\-paper match". +.TP +.B \-nocrop +By default, output is cropped to the CropBox specified in the PDF +file. This option disables cropping. +.TP +.B \-expand +Expand PDF pages smaller than the paper to fill the paper. By +default, these pages are not scaled. +.TP +.B \-noshrink +Don't scale PDF pages which are larger than the paper. By default, +pages larger than the paper are shrunk to fit. +.TP +.B \-nocenter +By default, PDF pages smaller than the paper (after any scaling) are +centered on the paper. This option causes them to be aligned to the +lower-left corner of the paper instead. +.TP +.B \-duplex +Set the Duplex pagedevice entry in the PostScript file. This tells +duplex-capable printers to enable duplexing. +.TP +.BI \-opw " password" +Specify the owner password for the PDF file. Providing this will +bypass all security restrictions. +.TP +.BI \-upw " password" +Specify the user password for the PDF file. +.TP +.B \-overprint +Enable overprinting. +.TP +.B \-q +Don't print any messages or errors. +.TP +.B \-v +Print copyright and version information. +.TP +.B \-h +Print usage information. +.RB ( \-help +and +.B \-\-help +are equivalent.) +.SH EXIT CODES +The Xpdf tools use the following exit codes: +.TP +0 +No error. +.TP +1 +Error opening a PDF file. +.TP +2 +Error opening an output file. +.TP +3 +Error related to PDF permissions. +.TP +99 +Other error. +.SH AUTHOR +The pdftops software and documentation are copyright 1996-2011 Glyph & +Cog, LLC. +.SH "SEE ALSO" +.BR pdfdetach (1), +.BR pdffonts (1), +.BR pdfimages (1), +.BR pdfinfo (1), +.BR pdftocairo (1), +.BR pdftohtml (1), +.BR pdftoppm (1), +.BR pdftotext (1) +.BR pdfseparate (1), +.BR pdfsig (1), +.BR pdfunite (1) diff --git a/lib/poppler-0.68.0/share/man/man1/pdftotext.1 b/lib/poppler-0.68.0/share/man/man1/pdftotext.1 new file mode 100644 index 0000000000000000000000000000000000000000..b42aaa54b3bac263ae82a31b1f44eb3d483c7cef --- /dev/null +++ b/lib/poppler-0.68.0/share/man/man1/pdftotext.1 @@ -0,0 +1,144 @@ +.\" Copyright 1997-2011 Glyph & Cog, LLC +.TH pdftotext 1 "15 August 2011" +.SH NAME +pdftotext \- Portable Document Format (PDF) to text converter +(version 3.03) +.SH SYNOPSIS +.B pdftotext +[options] +.RI [ PDF-file +.RI [ text-file ]] +.SH DESCRIPTION +.B Pdftotext +converts Portable Document Format (PDF) files to plain text. +.PP +Pdftotext reads the PDF file, +.IR PDF-file , +and writes a text file, +.IR text-file . +If +.I text-file +is not specified, pdftotext converts +.I file.pdf +to +.IR file.txt . +If +.I text-file +is \'-', the text is sent to stdout. +.SH OPTIONS +.TP +.BI \-f " number" +Specifies the first page to convert. +.TP +.BI \-l " number" +Specifies the last page to convert. +.TP +.BI \-r " number" +Specifies the resolution, in DPI. The default is 72 DPI. +.TP +.BI \-x " number" +Specifies the x-coordinate of the crop area top left corner +.TP +.BI \-y " number" +Specifies the y-coordinate of the crop area top left corner +.TP +.BI \-W " number" +Specifies the width of crop area in pixels (default is 0) +.TP +.BI \-H " number" +Specifies the height of crop area in pixels (default is 0) +.TP +.B \-layout +Maintain (as best as possible) the original physical layout of the +text. The default is to \'undo' physical layout (columns, +hyphenation, etc.) and output the text in reading order. +.TP +.BI \-fixed " number" +Assume fixed-pitch (or tabular) text, with the specified character +width (in points). This forces physical layout mode. +.TP +.B \-raw +Keep the text in content stream order. This is a hack which often +"undoes" column formatting, etc. Use of raw mode is no longer +recommended. +.TP +.B \-htmlmeta +Generate a simple HTML file, including the meta information. This +simply wraps the text in
 and 
and prepends the meta +headers. +.TP +.B \-bbox +Generate an XHTML file containing bounding box information for each +word in the file. +.TP +.B \-bbox-layout +Generate an XHTML file containing bounding box information for each +block, line, and word in the file. +.TP +.BI \-enc " encoding-name" +Sets the encoding to use for text output. This defaults to "UTF-8". +.TP +.B \-listenc +Lits the available encodings +.TP +.BI \-eol " unix | dos | mac" +Sets the end-of-line convention to use for text output. +.TP +.B \-nopgbrk +Don't insert page breaks (form feed characters) between pages. +.TP +.BI \-opw " password" +Specify the owner password for the PDF file. Providing this will +bypass all security restrictions. +.TP +.BI \-upw " password" +Specify the user password for the PDF file. +.TP +.B \-q +Don't print any messages or errors. +.TP +.B \-v +Print copyright and version information. +.TP +.B \-h +Print usage information. +.RB ( \-help +and +.B \-\-help +are equivalent.) +.SH BUGS +Some PDF files contain fonts whose encodings have been mangled beyond +recognition. There is no way (short of OCR) to extract text from +these files. +.SH EXIT CODES +The Xpdf tools use the following exit codes: +.TP +0 +No error. +.TP +1 +Error opening a PDF file. +.TP +2 +Error opening an output file. +.TP +3 +Error related to PDF permissions. +.TP +99 +Other error. +.SH AUTHOR +The pdftotext software and documentation are copyright 1996-2011 Glyph +& Cog, LLC. +.SH "SEE ALSO" +.BR pdfdetach (1), +.BR pdffonts (1), +.BR pdfimages (1), +.BR pdfinfo (1), +.BR pdftocairo (1), +.BR pdftohtml (1), +.BR pdftoppm (1), +.BR pdftops (1), +.BR pdfseparate (1), +.BR pdfsig (1), +.BR pdfunite (1) diff --git a/lib/poppler-0.68.0/share/man/man1/pdfunite.1 b/lib/poppler-0.68.0/share/man/man1/pdfunite.1 new file mode 100644 index 0000000000000000000000000000000000000000..4a1b4ea89bc114bb4b6f0f69d4d47b553389ee71 --- /dev/null +++ b/lib/poppler-0.68.0/share/man/man1/pdfunite.1 @@ -0,0 +1,43 @@ +.\" Copyright 2011 The Poppler Developers - http://poppler.freedesktop.org +.TH pdfunite 1 "15 September 2011" +.SH NAME +pdfunite \- Portable Document Format (PDF) page merger +.SH SYNOPSIS +.B pdfunite +[options] +.I PDF-sourcefile1..PDF-sourcefilen PDF-destfile +.SH DESCRIPTION +.B pdfunite +merges several PDF (Portable Document Format) files in order of their occurrence on command line to one PDF result file. +.TP +Neither of the PDF-sourcefile1 to PDF-sourcefilen should be encrypted. +.SH OPTIONS +.TP +.B \-v +Print copyright and version information. +.TP +.B \-h +Print usage information. +.RB ( \-help +and +.B \-\-help +are equivalent.) +.SH EXAMPLE +pdfunite sample1.pdf sample2.pdf sample.pdf +.TP +merges all pages from sample1.pdf and sample2.pdf (in that order) and creates sample.pdf +.SH AUTHOR +The pdfunite software and documentation are copyright 1996-2004 Glyph & Cog, LLC +and copyright 2005-2011 The Poppler Developers - http://poppler.freedesktop.org +.SH "SEE ALSO" +.BR pdfdetach (1), +.BR pdffonts (1), +.BR pdfimages (1), +.BR pdfinfo (1), +.BR pdftocairo (1), +.BR pdftohtml (1), +.BR pdftoppm (1), +.BR pdftops (1), +.BR pdftotext (1) +.BR pdfseparate (1), +.BR pdfsig (1)