btparser/cparser/tests/exp_lex/MOBITemplate.bt

714 lines
17 KiB
Plaintext
Raw Normal View History

2016-06-05 21:47:15 +08:00
1:
2:
3:
4:
5:
6:
7:
8:
9:
10:
11:
12:
13: typedef struct {
14: DWORD dataOffset ;
15: UBYTE attributeBits ;
16: UBYTE uid1 ;
17: WORD uid2 ;
18: } ri ;
19:
20: void checkpdfheader ( )
21: {
22:
23: if ( ( type != "BOOK" || creator != "MOBI" )
24: && ( type != "TEXt" || creator != "REAd" ) )
25: {
26: Warning ( "File is not BOOKMOBI or TEXtREAd. Template stopped." ) ;
27: return - 1 ;
28: }
29: }
30:
31: typedef struct {
32: CHAR name [ 32 ] ;
33: WORD attributeBits ;
34: WORD version ;
35: DWORD creationDate ;
36: DWORD modificationDate ;
37: DWORD lastBackupDate ;
38: DWORD modificationNumber ;
39: DWORD appInfoID ;
40: DWORD sortInfoID ;
41: CHAR type [ 4 ] ;
42: CHAR creator [ 4 ] ;
43: checkpdfheader ( ) ;
44: DWORD uniqueIDseed ;
45: DWORD nextRecordListID ;
46: WORD numberOfRecords ;
47:
48: SetBackColor ( cWhite ) ;
49: struct {
50: DWORD dataOffset ;
51: UBYTE attributeBits ;
52: UBYTE uid1 ;
53: WORD uid2 ;
54: } recptr [ pdf . numberOfRecords ] ;
55: } PalmDatabaseFormat ;
56:
57: enum < ushort > eCompression {
58: NoCompression = 1 ,
59: PalmDOC = 2 ,
60: HUFFCDIC = 17480
61: } ;
62:
63: enum < ushort > encryptionType {
64: NoEncryption = 0 ,
65: OldMobipocketEncryption = 1 ,
66: MobipocketEncryption = 2
67: } ;
68:
69: enum < ushort > TextEncoding {
70: CP1252_WinLatin1 = 1252 ,
71: UTF8 = 65001
72: } ;
73:
74: enum < uint > MobiType {
75: MobipocketBook = 2 ,
76: PalmDOCbook = 3 ,
77: Audio = 4 ,
78: News = 257 ,
79: NewsFeed = 258 ,
80: NewsMagazine = 259 ,
81: PICS = 513 ,
82: Word = 514 ,
83: XLS = 515 ,
84: PPT = 516 ,
85: TEXT = 517 ,
86: HTML = 518
87: } ;
88:
89:
90: typedef struct {
91: eCompression compression ;
92: WORD unused ;
93: DWORD textLength ;
94: WORD numPDBrecords ;
95: WORD recMaxSize ;
96: encryptionType encryption ;
97: Assert ( encryption == 0 , "File encrypted. Abort." ) ;
98: WORD unknown1 ;
99: } PalmDOCheader ;
100:
101: typedef enum < uint32 > eMBHflags {
102: multibyte = 0x1 ,
103: trailers = 0x2
104: } MBHflags < read = readMBHflags > ;
105:
106: string readMBHflags ( local MBHflags & flags ) {
107: local string s = "" ;
108: local int commaNeeded = 0 ;
109: local MBHflags i = 1 ;
110:
111: SPrintf ( s , "%x: " , flags ) ;
112:
113:
114:
115:
116:
117: while ( i <= 2 ) {
118: if ( flags && i ) {
119: if ( commaNeeded ) { s += ", " ; }
120: s += EnumToString ( i ) ;
121: commaNeeded = 1 ;
122: }
123: i = i << 1 ;
124: }
125: return s ;
126: }
127:
128:
129: typedef struct {
130: CHAR identifier [ 4 ] < comment = "continuation of Palm DOC Header" > ;
131: DWORD headerLength ;
132: MobiType mobiType ;
133: WORD cryptoType ;
134: TextEncoding textEncoding < comment = "codepage" > ;
135: DWORD uniqueID ;
136: DWORD MOBIversion ;
137: DWORD orthographicIndex < format = hex > ;
138: DWORD inflectionIndex < format = hex > ;
139: DWORD indexNames < format = hex > ;
140: DWORD indexKeys < format = hex > ;
141: DWORD extraIndex0 < format = hex > ;
142: DWORD extraIndex1 < format = hex > ;
143: DWORD extraIndex2 < format = hex > ;
144: DWORD extraIndex3 < format = hex > ;
145: DWORD extraIndex4 < format = hex > ;
146: DWORD extraIndex5 < format = hex > ;
147: DWORD firstNonBookIndex < format = hex > ;
148: DWORD fullNameOffset < comment = "Book Title" > ;
149: DWORD fullNameLength < comment = "Book Title Length" > ;
150: DWORD locale ;
151: DWORD inputLanguage ;
152: DWORD outputLanguage ;
153: DWORD minVersion ;
154: DWORD firstImageIndex ;
155: DWORD huffmanRecordOffset ;
156: DWORD huffmanRecordCount ;
157: DWORD huffmanTableOffset ;
158: DWORD huffmanTableLength ;
159: DWORD EXTHflags ;
160: CHAR unknown2 [ 32 ] ;
161: DWORD DRMoffset ;
162: DWORD DRMcount < format = hex > ;
163: DWORD DRMsize < format = hex > ;
164: DWORD DRMflags ;
165: CHAR unknown3 [ 12 ] ;
166: WORD FirstContentRecNo ;
167: WORD LastContentRecNo ;
168: DWORD unknown4 ;
169: DWORD FCISrecNo ;
170: DWORD unknown5 ;
171: DWORD FLISrecNo ;
172: DWORD unknown6 ;
173: QWORD unknown7 ;
174: DWORD unknown8 ;
175: DWORD unknown9 ;
176: DWORD unknown10 ;
177: DWORD unknown11 ;
178: MBHflags mbhflags ;
179: DWORD INDXrecordOffset ;
180: } MobiHeader ;
181:
182:
183: enum < ubyte > TextToSpeach {
184: TextToSpeechEnabled = 0 ,
185: TextToSpeechDisabled = 1
186: } ;
187:
188: enum < DWORD > CreatorSoftware {
189: mobigen = 1 ,
190: MobipocketCreator = 2 ,
191: kindlegen_Windows = 200 ,
192: kindlegen_Linux = 201 ,
193: kindlegen_Mac = 202
194: } ;
195:
196: typedef struct {
197: enum < DWORD > EXTHrecordType {
198: Drm_Server_Id = 1 ,
199: Drm_Commerce_Id = 2 ,
200: Drm_Ebookbase_Book_Id = 3 ,
201: Creator = 100 ,
202: Publisher = 101 ,
203: Imprint = 102 ,
204: Description = 103 ,
205: ISBN = 104 ,
206: Subject = 105 ,
207: Published = 106 ,
208: Review = 107 ,
209: Contributor = 108 ,
210: Rights = 109 ,
211: SubjectCode = 110 ,
212: Type = 111 ,
213: Source = 112 ,
214: ASIN = 113 ,
215: VersionNumber = 114 ,
216: Sample = 115 ,
217: StartReading = 116 ,
218: Adult = 117 ,
219: Price = 118 ,
220: Currency = 119 ,
221: K8_Boundary_Section = 121 ,
222: fixed_layout = 122 ,
223: book_type = 123 ,
224: orientation_lock = 124 ,
225: K8_Count_of_Resources_Fonts_Images = 125 ,
226: original_resolution = 126 ,
227: K8_Cover_Image = 129 ,
228: K8_Unidentified_Count = 131 ,
229: RegionMagnification = 132 ,
230: DictShortName = 200 ,
231: CoverOffset = 201 ,
232: ThumbOffset = 202 ,
233: HasFakeCover = 203 ,
234: CreatorSoftwareRecord = 204 ,
235: CreatorMajorVersion = 205 ,
236: CreatorMinorVersion = 206 ,
237: CreatorBuildNumber = 207 ,
238: Watermark = 208 ,
239: Tamper_proof_keys = 209 ,
240: FontSignature = 300 ,
241: ClippingLimit = 401 ,
242: PublisherLimit = 402 ,
243: TextToSpeachFlag = 404 ,
244: CDE_Type = 501 ,
245: last_update_time = 502 ,
246: Updated_Title = 503
247:
248:
249:
250:
251:
252:
253:
254: } recordType ;
255: DWORD recordLength ;
256: switch ( recordType ) {
257: case 1 :
258: UBYTE Drm_Server_Id [ recordLength - 8 ] ;
259: break ;
260: case 2 :
261: UBYTE Drm_Commerce_Id [ recordLength - 8 ] ;
262: break ;
263: case 3 :
264: UBYTE Drm_Ebookbase_Book_Id [ recordLength - 8 ] ;
265: break ;
266: case 100 :
267: UBYTE creator [ recordLength - 8 ] ;
268: break ;
269: case 101 :
270: UBYTE publisher [ recordLength - 8 ] ;
271: break ;
272: case 102 :
273: UBYTE imprint [ recordLength - 8 ] ;
274: break ;
275: case 103 :
276: UBYTE description [ recordLength - 8 ] ;
277: break ;
278: case 104 :
279: UBYTE ISBN [ recordLength - 8 ] ;
280: break ;
281: case 105 :
282: UBYTE subject [ recordLength - 8 ] ;
283: break ;
284: case 106 :
285: UBYTE publishingDate [ recordLength - 8 ] ;
286: break ;
287: case 107 :
288: UBYTE review [ recordLength - 8 ] ;
289: break ;
290: case 108 :
291: UBYTE contributor [ recordLength - 8 ] ;
292: break ;
293: case 109 :
294: UBYTE rights [ recordLength - 8 ] ;
295: break ;
296: case 110 :
297: UBYTE subjectCode [ recordLength - 8 ] ;
298: break ;
299: case 111 :
300: UBYTE type [ recordLength - 8 ] ;
301: break ;
302: case 112 :
303: UBYTE source [ recordLength - 8 ] ;
304: break ;
305: case 113 :
306: UBYTE ASIN [ recordLength - 8 ] ;
307: break ;
308: case 114 :
309: UBYTE versionNumber [ recordLength - 8 ] ;
310: break ;
311: case 115 :
312: Assert ( recordLength == 12 , "sample recordLength-8 != 4 (DWORD)." ) ;
313: DWORD sample ;
314: break ;
315: case 116 :
316: UBYTE startReading [ recordLength - 8 ] ;
317: break ;
318: case 117 :
319: UBYTE adult [ recordLength - 8 ] ;
320: break ;
321: case 118 :
322: UBYTE price [ recordLength - 8 ] ;
323: break ;
324: case 119 :
325: UBYTE currency [ recordLength - 8 ] ;
326: break ;
327: case 121 :
328: case 122 :
329: case 123 :
330: case 124 :
331: case 125 :
332: case 126 :
333: case 129 :
334: case 131 :
335: case 132 :
336: UBYTE unknown [ recordLength - 8 ] ;
337: break ;
338:
339: case 200 :
340: UBYTE dictShortName [ recordLength - 8 ] ;
341: break ;
342: case 201 :
343: Assert ( recordLength == 12 , "coverOffset recordLength-8 != 4 (DWORD)." ) ;
344: DWORD coverOffset ;
345: break ;
346: case 202 :
347: Assert ( recordLength == 12 , "thumbOffset recordLength-8 != 4 (DWORD)." ) ;
348: DWORD thumbOffset ;
349: break ;
350: case 203 :
351: UBYTE hasFakeCover [ recordLength - 8 ] ;
352: break ;
353: case 204 :
354: Assert ( recordLength == 12 , "creatorSoftware recordLength-8 != 4 (DWORD)." ) ;
355: CreatorSoftware creatorSoftware ;
356: break ;
357: case 205 :
358: Assert ( recordLength == 12 , "creatorMajorVersion recordLength-8 != 4 (DWORD)." ) ;
359: DWORD creatorMajorVersion ;
360: break ;
361: case 206 :
362: Assert ( recordLength == 12 , "creatorMinorVersion recordLength-8 != 4 (DWORD)." ) ;
363: DWORD creatorMinorVersion ;
364: break ;
365: case 207 :
366: Assert ( recordLength == 12 , "creatorBuildNumber recordLength-8 != 4. (DWORD)" ) ;
367: DWORD creatorBuildNumber ;
368: break ;
369: case 208 :
370: UBYTE watermark [ recordLength - 8 ] ;
371: break ;
372: case 209 :
373: UBYTE tamper_proof_keys [ recordLength - 8 ] ;
374: break ;
375: case 300 :
376: UBYTE fontSignature [ recordLength - 8 ] ;
377: break ;
378: case 301 :
379: UBYTE clippingLimit [ recordLength - 8 ] ;
380: break ;
381: case 402 :
382: UBYTE publisherLimit [ recordLength - 8 ] ;
383: break ;
384: case 404 :
385: Assert ( recordLength == 9 , "TextToSpeach recordLength-8 != 1 (BYTE)." ) ;
386: TextToSpeach textToSpeach ;
387: break ;
388: case 501 :
389: Assert ( recordLength == 12 , "CDEtype recordLength-8 != 1. (DWORD)" ) ;
390: CHAR CDEtype [ 4 ] ;
391: break ;
392: case 502 :
393: UBYTE lastUpdateTime [ recordLength - 8 ] ;
394: break ;
395: case 503 :
396: UBYTE updatedTitle [ recordLength - 8 ] ;
397: break ;
398: case 504 :
399: UBYTE ASINcopy [ recordLength - 8 ] ;
400: break ;
401: case 524 :
402: UBYTE dclanguage [ recordLength - 8 ] ;
403: break ;
404: default :
405: UBYTE unknown [ recordLength - 8 ] ;
406: break ;
407: }
408: } EXTHrecord ;
409:
410: typedef struct {
411: CHAR identifier [ 4 ] ;
412: DWORD headerLength ;
413: UINT recordCount ;
414: local int i = 0 ;
415: for ( i = 0 ; i < recordCount ; i ++ ) {
416: EXTHrecord exthrecord ;
417: }
418: } ExthHeader ;
419:
420: typedef struct {
421: UINT ID < comment = "FLIS" > ;
422: UINT fixed1 < comment = "fixed value 8" > ;
423: USHORT fixed2 < comment = "fixed value 65" > ;
424: USHORT fixed3 < comment = "fixed value 0" > ;
425: UINT fixed4 < comment = "fixed value 0" > ;
426: UINT fixed5 < comment = "fixed value -1" > ;
427: USHORT fixed6 < comment = "fixed value 1" > ;
428: USHORT fixed7 < comment = "fixed value 3" > ;
429: UINT fixed8 < comment = "fixed value 3" > ;
430: UINT fixed9 < comment = "fixed value 1" > ;
431: UINT fixed10 < comment = "fixed value -1" > ;
432: } FLISRECORD ;
433:
434:
435: typedef struct {
436: UINT ID < comment = "FDST" > ;
437: UINT FDSTstart < comment = "FDST start" > ;
438: UINT fdstcnt < comment = "Number of records inside FDST" > ;
439: struct {
440: UBYTE record [ reclen - 12 ] ;
441: } fdst ;
442: } FDSTkf8RECORD ;
443:
444: typedef struct {
445: UINT ID < comment = "FCIS" > ;
446: UINT fixed1 < comment = "fixed value 20" > ;
447: UINT fixed2 < comment = "fixed value 16" > ;
448: UINT fixed3 < comment = "fixed value 1" > ;
449: UINT fixed4 < comment = "fixed value 0" > ;
450: UINT fixed5 < comment = "text length (the same value as \"text length\" in the PalmDoc header)" > ;
451: UINT fixed6 < comment = "fixed value 0" > ;
452: UINT fixed7 < comment = "fixed value 32" > ;
453: UINT fixed8 < comment = "fixed value 8" > ;
454: USHORT fixed9 < comment = "fixed value 1" > ;
455: USHORT fixed10 < comment = "fixed value 1" > ;
456: UINT fixed11 < comment = "fixed value 0" > ;
457: } FCISRECORD ;
458:
459: typedef struct {
460: UINT ID < comment = "SRCS" > ;
461: struct {
462: UBYTE record [ reclen - 4 ] ;
463: } srcs ;
464: } SRCSRECORD ;
465:
466: typedef struct {
467: UINT ID < comment = "DATP" > ;
468: struct {
469: UBYTE record [ reclen - 4 ] ;
470: } datp ;
471: } DATPRECORD ;
472:
473: typedef struct {
474: QUAD ID < comment = "BOUNDARY" > ;
475: } BOUNDARYRECORD ;
476:
477: typedef struct {
478: struct {
479: UBYTE b [ reclen ] ;
480: } html ;
481: } HTML ;
482:
483: typedef struct {
484: UINT ID < comment = "INDX" > ;
485: UINT headerLength < comment = "" > ;
486: UINT indexType < comment = "" > ;
487: UINT unknown1 < comment = "" > ;
488: UINT unknown2 < comment = "" > ;
489: UINT idxtStart < comment = "offset to the IDXT section" > ;
490: UINT indexEncoding ;
491: UINT indexLanguage < comment = "language code of the index" > ;
492: UINT totalIndexCount < comment = "number of index entries" > ;
493: UINT ordtStart < comment = "offset to the ORDT section" > ;
494: UINT ligtStart < comment = "offset to the LIGT section" > ;
495: UINT unknown3 ;
496: UINT unknown4 ;
497: } INDXRECORD ;
498:
499: typedef struct {
500: UBYTE fixed1 < comment = "fixed value 233 (0xE9)" > ;
501: UBYTE fixed2 < comment = "fixed value 142 (0x8E)" > ;
502: UBYTE fixed3 < comment = "fixed value 13 (0x0D)" > ;
503: UBYTE fixed4 < comment = "fixed value 10 (0x0A)" > ;
504: } ENDRECORD ;
505:
506:
507:
508:
509:
510: BigEndian ( ) ;
511: SetBackColor ( cLtGray ) ;
512: PalmDatabaseFormat pdf ;
513:
514:
515: FSeek ( pdf . recptr [ 0 ] . dataOffset ) ;
516: SetBackColor ( cLtGray ) ;
517: PalmDOCheader pdh ;
518: MobiHeader mbh ;
519: local char fullName [ 255 ] ;
520: local char KF8fullName [ 255 ] ;
521:
522: local int reclen ;
523: local int i , n ;
524: local int endrec ;
525: local char tag [ 9 ] ;
526:
527: if ( mbh . fullNameOffset != 0 )
528: {
529:
530: FSeek ( pdf . recptr [ 0 ] . dataOffset + mbh . fullNameOffset ) ;
531: ReadBytes ( fullName , FTell ( ) , mbh . fullNameLength ) ;
532: fullName [ mbh . fullNameLength ] = '\0' ;
533:
534: }
535:
536: if ( mbh . EXTHflags & 0x40 )
537: {
538:
539: FSeek ( pdf . recptr [ 0 ] . dataOffset + 16 + mbh . headerLength ) ;
540: SetBackColor ( cYellow ) ;
541: ExthHeader exth ;
542: }
543:
544: local uint multibyte = 0 ;
545: local uint trailers = 0 ;
546: if ( pdf . type == "BOOK" && pdf . creator == "MOBI" )
547: {
548: if ( ( mbh . headerLength >= 0xE4 ) && ( pdf . version >= 5 ) )
549: {
550: multibyte = flags & 1 ;
551: while ( flags > 1 )
552: {
553: if ( flags & 2 )
554: {
555: ++ trailers ;
556: flags = flags >> 1 ;
557: }
558: }
559: }
560: }
561:
562:
563: for ( i = 0 ; i < pdf . numberOfRecords - 1 ; i ++ )
564: {
565: FSeek ( pdf . recptr [ i ] . dataOffset ) ;
566: reclen = ( pdf . recptr [ i + 1 ] . dataOffset - pdf . recptr [ i ] . dataOffset ) ;
567:
568: ReadBytes ( tag , FTell ( ) , 8 ) ;
569: tag [ 8 ] = '\0' ;
570:
571:
572:
573: if ( Memcmp ( tag , "FLIS" , 4 ) == 0 )
574: {
575: SetBackColor ( cLtGray ) ;
576: FLISRECORD data ;
577: }
578: else if ( Memcmp ( tag , "FDST" , 4 ) == 0 )
579: {
580: SetBackColor ( cLtGray ) ;
581: FDSTkf8RECORD data ;
582: }
583: else if ( Memcmp ( tag , "FCIS" , 4 ) == 0 )
584: {
585: SetBackColor ( cLtGreen ) ;
586: FCISRECORD data ;
587: if ( data . fixed1 < 0x10 )
588: {
589:
590: }
591:
592:
593: }
594: else if ( Memcmp ( tag , "SRCS" , 4 ) == 0 )
595: {
596: SetBackColor ( cLtRed ) ;
597: SRCSRECORD data ;
598: }
599: else if ( Memcmp ( tag , "DATP" , 4 ) == 0 )
600: {
601: SetBackColor ( cLtBlue ) ;
602: DATPRECORD data ;
603: for ( n = 0 ; n < reclen - 4 ; n ++ )
604: {
605: if ( data . datp . record [ n ] < 0x10 )
606: {
607:
608: }
609:
610: }
611:
612: }
613: else if ( Memcmp ( tag , "INDX" , 4 ) == 0 )
614: {
615: SetBackColor ( cSilver ) ;
616: INDXRECORD data ;
617: }
618: else if ( Memcmp ( tag , "BOUNDARY" , 8 ) == 0 )
619: {
620: SetBackColor ( cYellow ) ;
621: BOUNDARYRECORD data ;
622:
623: SetBackColor ( cLtGray ) ;
624: i ++ ;
625: PalmDOCheader data ;
626: MobiHeader KF8mbh ;
627: if ( KF8mbh . fullNameOffset != 0 )
628: {
629:
630: FSeek ( pdf . recptr [ i ] . dataOffset + KF8mbh . fullNameOffset ) ;
631: ReadBytes ( KF8fullName , FTell ( ) , KF8mbh . fullNameLength ) ;
632: fullName [ KF8mbh . fullNameLength ] = '\0' ;
633:
634: }
635: if ( KF8mbh . EXTHflags & 0x40 )
636: {
637:
638:
639: FSeek ( pdf . recptr [ i ] . dataOffset + 16 + KF8mbh . headerLength ) ;
640: SetBackColor ( cYellow ) ;
641: ExthHeader KF8exth ;
642: }
643: }
644: else if ( Memcmp ( tag , "<html>" , 6 ) == 0 )
645: {
646: SetBackColor ( cLtGreen ) ;
647: HTML data ;
648: }
649: else if ( Memcmp ( tag , "<!DOCTYP" , 8 ) == 0 )
650: {
651: SetBackColor ( cLtGreen ) ;
652: HTML data ;
653: }
654: else
655: {
656: SetBackColor ( cLtGray ) ;
657: struct {
658: UBYTE unknown [ reclen ] ;
659: } data ;
660: }
661: }
662:
663: endrec = ReadUInt ( FTell ( ) ) ;
664:
665: if ( endrec == 0xE98E0D0A )
666: {
667: SetBackColor ( cLtGreen ) ;
668: ENDRECORD data ;
669: }
670:
671:
672:
673:
674:
675:
676:
677:
678:
679:
680:
681:
682:
683:
684:
685:
686:
687:
688:
689:
690:
691:
692:
693:
694:
695:
696:
697:
698:
699:
700:
701:
702:
703:
704:
705:
706:
707:
708:
709:
710:
711:
712:
713:
714: return ; tok_eof