/* Copyright (c) MediaArea.net SARL. All Rights Reserved. * * Use of this source code is governed by a BSD-style license that can * be found in the License.html file in the root of the source tree. */ //--------------------------------------------------------------------------- // Pre-compilation #include "MediaInfo/PreComp.h" #ifdef __BORLANDC__ #pragma hdrstop #endif //--------------------------------------------------------------------------- //--------------------------------------------------------------------------- #include "MediaInfo/Setup.h" //--------------------------------------------------------------------------- //--------------------------------------------------------------------------- #if defined(MEDIAINFO_PDF_YES) //--------------------------------------------------------------------------- //--------------------------------------------------------------------------- #include "MediaInfo/Text/File_Pdf.h" #include "MediaInfo/Tag/File_Xmp.h" #include #include using namespace std; //--------------------------------------------------------------------------- namespace MediaInfoLib { //*************************************************************************** // Constructor/Destructor //*************************************************************************** //--------------------------------------------------------------------------- File_Pdf::File_Pdf() :File__Analyze() { } //*************************************************************************** // Streams management //*************************************************************************** //--------------------------------------------------------------------------- void File_Pdf::Streams_Accept() { Fill(Stream_General, 0, General_Format, "PDF"); Stream_Prepare(Stream_Text); Fill(Stream_Text, 0, "Format", "PDF"); } //*************************************************************************** // Buffer - File header //*************************************************************************** //--------------------------------------------------------------------------- bool File_Pdf::FileHeader_Begin() { //Synchro if (5>Buffer_Size) return false; if (Buffer[0]!=0x25 //"%PDF-" || Buffer[1]!=0x50 || Buffer[2]!=0x44 || Buffer[3]!=0x46 || Buffer[4]!=0x2D) { Reject(); return false; } Accept(); //Temp Catalog_Level=0; Offsets_Max=0; Objects_Current=Objects.end(); //All should be OK... return true; } //--------------------------------------------------------------------------- void File_Pdf::FileHeader_Parse() { string PdfHeader; Get_String(SizeOfLine(), PdfHeader, "Header"); for (;;) { int64u CommentSize=SizeOfLine(); if (Buffer_Offset+Element_Offset>=Buffer_Size) { Element_WaitForMoreData(); return; } if (Buffer[Buffer_Offset+Element_Offset]!='%') break; Skip_String(CommentSize, "Comment"); } //Filling Fill(Stream_General, 0, General_Format_Version, PdfHeader.substr(5)); GoToFromEnd(9+2+10+2+5+2); // "startxref" + EOL + 10max digits + EOL + "%%EOF" + EOL State=State_Parsing_startxref; } //*************************************************************************** // Buffer - Global //*************************************************************************** //--------------------------------------------------------------------------- void File_Pdf::Read_Buffer_Continue() { switch (State) { case State_Parsing_xref : xref(); if (!Element_IsWaitingForMoreData()) trailer(); break; case State_Parsing_startxref : eof(); startxref(); break; case State_Parsing_object : break; //Using elements default : Finish(); } } //*************************************************************************** // Buffer - Per element //*************************************************************************** //--------------------------------------------------------------------------- bool File_Pdf::Header_Begin() { //Offsets_Current=Offsets.find(Objects_Current->second.Offset); //offsets::iterator Offsets_Next=Offsets_Current; //Offsets_Next++; //if (Offsets_Next!=Offsets.end() && Offsets_Next->first>File_Offset+Buffer_Size) //{ // Element_WaitForMoreData(); // return false; //} return true; } //--------------------------------------------------------------------------- void File_Pdf::Header_Parse() { offsets::iterator Offsets_Next=upper_bound(Offsets.begin(), Offsets.end(), (int32u)(File_Offset+Buffer_Offset)); if (Offsets_Next!=Offsets.end() && *Offsets_Next>File_Offset+Buffer_Size) { Element_WaitForMoreData(); return; } int64u Size; //if (Offsets_Current==Offsets.end()) // Size=Offsets_Max-(File_Offset+Buffer_Offset); //else // Size=Offsets_Current->first-(File_Offset+Buffer_Offset); if (Offsets_Next==Offsets.end()) Size=Offsets_Max-(File_Offset+Buffer_Offset); else Size=*Offsets_Next-(File_Offset+Buffer_Offset); Header_Fill_Size(Size); } //--------------------------------------------------------------------------- void File_Pdf::Data_Parse() { Element_Name("Object"); string Line; Get_String(SizeOfLine(), Line, "Header"); size_t Space_Pos=Line.find(' '); int32u ObjectNumber=Ztring().From_UTF8(Line.substr(0, Space_Pos)).To_int32u(); Element_Info1(ObjectNumber); objects::iterator Object=Objects.find(ObjectNumber); if (Object==Objects.end()) Skip_XX(Element_Size-Element_Offset, "Data"); else switch(Object->second.Type) { case Type_Root : Object_Root(); break; case Type_Info : Object_Info(); break; case Type_Metadata : Object_Metadata(); break; default : Skip_XX(Element_Size-Element_Offset, "Data"); } for (;;) { if (Objects_Current==Objects.end()) break; Objects_Current->second.BottomPos++; if (Objects_Current->second.BottomPos>=Objects_Current->second.Bottoms.size()) { if (Objects_Current->first==(int32u)-1) { //No more to parse Objects_Current=Objects.end(); Objects.clear(); Finish(); break; } Objects_Current=Objects.find(Objects_Current->second.TopObject); continue; } Objects_Current=Objects.find(Objects_Current->second.Bottoms[Objects_Current->second.BottomPos]); GoTo(Objects_Current->second.Offset); break; } } //*************************************************************************** // Elements //*************************************************************************** //--------------------------------------------------------------------------- void File_Pdf::xref() { //Parsing Element_Begin1("Cross-Reference Table"); Element_Begin1("Cross-Reference Section"); string FirstLine; Get_String(SizeOfLine(), FirstLine, "Object name"); if (FirstLine!="xref") { //Problem Skip_XX(Element_Size-Element_Offset, "(Problem)"); Element_End0(); Element_End0(); return; } Element_Begin1("Cross-Reference SubSection"); Get_String(SizeOfLine(), FirstLine, "Header"); size_t FirstLine_Space=FirstLine.find(' '); int32u Base=atoi((const char*)FirstLine.c_str()); int32u Count=0; if (FirstLine_Space!=string::npos) Count=atoi((const char*)FirstLine.c_str()+FirstLine_Space+1); while (Element_Offset(Element_Size-Element_Offset)/20) { if (File_Offset+Buffer_Size100) Element_Offset+=20; else { Skip_String(18, "Entry"); Param_Info1(Base+Pos); Element_Offset+=2; //Skipping spaces at end and line return } } Element_End0(); Element_End0(); Element_End0(); if (File_Offset+Buffer_Offset>Offsets_Max) Offsets_Max=(int32u)(File_Offset+Buffer_Offset); } //--------------------------------------------------------------------------- void File_Pdf::trailer() { Element_Begin1("Trailer"); //Parsing int32u Prev=(int32u)-1; string Key; Ztring Value; Get_String(SizeOfLine(), Key, "Object name"); if (Key!="trailer") { //Problem Skip_XX(Element_Size-Element_Offset, "(Problem)"); } Skip_String(SizeOfLine(), "Object name"); while (Element_Offsetsecond.Bottoms[0]); GoTo(Objects_Current->second.Offset); State=State_Parsing_object; } //--------------------------------------------------------------------------- void File_Pdf::startxref() { //We need to find the exact begin Buffer_Offset=Buffer_Size-1; while (Buffer_Offset && (Buffer[Buffer_Offset]=='\r' || Buffer[Buffer_Offset]=='\n')) Buffer_Offset--; Buffer_Offset-=5; // "%%EOF" while (Buffer_Offset && (Buffer[Buffer_Offset]=='\r' || Buffer[Buffer_Offset]=='\n')) Buffer_Offset--; while (Buffer_Offset && Buffer[Buffer_Offset]>='0' && Buffer[Buffer_Offset]<='9') // Value Buffer_Offset--; while (Buffer_Offset && (Buffer[Buffer_Offset]=='\r' || Buffer[Buffer_Offset]=='\n')) Buffer_Offset--; Buffer_Offset-=8; //Parsing Element_Begin1("Cross-Reference Table Offset"); string xrefOffsetS; Skip_String(SizeOfLine(), "Object name"); Get_String (SizeOfLine(), xrefOffsetS, "xref Offset"); while (Buffer_OffsetOffsets_Max) Offsets_Max=xref_Offset; GoTo (xref_Offset); State=State_Parsing_xref; } //--------------------------------------------------------------------------- void File_Pdf::eof() { if (File_Size!=(int64u)-1 && File_Offset+Buffer_Sizefirst; Objects[Objects_Current->first].Bottoms.push_back(ObjectNumber); Param_Info1(__T("Metadata is at offset 0x"+Ztring().From_Number(Objects[ObjectNumber].Offset))); } } continue; } if (Key.empty()) break; } } //--------------------------------------------------------------------------- void File_Pdf::Object_Info() { Element_Info1("Info"); //Parsing string Key; Ztring Value; while (Element_Offset')) while (Element_Offset' && Buffer[End+1]=='>')) End++; return End-(Buffer_Offset+(size_t)Element_Offset); } //--------------------------------------------------------------------------- bool File_Pdf::Get_Next(string &Key, Ztring &Value) { Key.clear(); Value.clear(); string Line; //Removig end of lines while (Element_Offset=Element_Size) return true; //Testing Catalog Peek_String (2, Line); if (Line=="<<") { Element_Offset+=2; Catalog_Level++; return true; } else if (Line==">>") { Element_Offset+=2; Catalog_Level--; return true; } //Getting a complete line Peek_String (SizeOfLine(), Line); //Testing Catalog size_t Catalog_End=Line.find(">>"); if (Catalog_End!=String::npos) Line.resize(Catalog_End); //Testing stream if (Line=="stream") { Skip_String(Line.size(), "Stream, Header"); Key=Line; return false; } if (Line=="endstream") { Skip_String(Line.size(), "Stream, Footer"); Key=Line; return false; } //Testing object if (Line=="endobj") { Skip_String(Line.size(), "Footer"); Key=Line; return false; } //Base int64u Line_Base=Element_Offset; //Testing next key size_t Line_End=0; size_t Line_Begin=Line_End; // Key-Value if (Line_Begin