xref: /plan9/sys/src/cmd/aux/antiword/wordtypes.h (revision 25b329d522281a8cdd35da0dcc08c3fc621059a9)
1 /*
2  * wordtypes.h
3  * Copyright (C) 1998-2004 A.J. van Os; Released under GPL
4  *
5  * Description:
6  * Typedefs for the interpretation of MS Word files
7  */
8 
9 #if !defined(__wordtypes_h)
10 #define __wordtypes_h 1
11 
12 #include <time.h>
13 #if defined(__riscos)
14 #include "DeskLib:Font.h"
15 #include "DeskLib:Wimp.h"
16 #endif /* __riscos */
17 
18 typedef unsigned char	UCHAR;
19 typedef unsigned short	USHORT;
20 typedef unsigned int	UINT;
21 typedef unsigned long	ULONG;
22 
23 #if defined(__riscos)
24 typedef struct diagram_tag {
25 	drawfile_info	tInfo;
26 	window_handle	tMainWindow;
27 	window_handle	tScaleWindow;
28 	menu_ptr	pSaveMenu;
29 	long		lXleft;			/* In DrawUnits */
30 	long		lYtop;			/* In DrawUnits */
31 	size_t		tMemorySize;
32 	int		iScaleFactorCurr;	/* In percentage */
33 	int		iScaleFactorTemp;	/* In percentage */
34 	char		szFilename[19+1];
35 } diagram_type;
36 #else
37 typedef struct diagram_tag {
38 	FILE		*pOutFile;
39 	long		lXleft;			/* In DrawUnits */
40 	long		lYtop;			/* In DrawUnits */
41 } diagram_type;
42 typedef UCHAR		drawfile_fontref;
43 #endif /* __riscos */
44 
45 typedef struct output_tag {
46 	char	*szStorage;
47 	long	lStringWidth;		/* In millipoints */
48 	size_t	tStorageSize;
49 	size_t	tNextFree;
50 	USHORT	usFontStyle;
51 	USHORT	usFontSize;
52 	UCHAR	ucFontColor;
53 	drawfile_fontref	tFontRef;
54 	struct output_tag	*pPrev;
55 	struct output_tag	*pNext;
56 } output_type;
57 
58 /* Types of conversion */
59 typedef enum conversion_tag {
60 	conversion_unknown = 0,
61 	conversion_text,
62 	conversion_draw,
63 	conversion_ps,
64 	conversion_xml,
65 	conversion_pdf,
66 	conversion_fmt_text
67 } conversion_type;
68 
69 /* Types of encoding */
70 typedef enum encoding_tag {
71 	encoding_neutral = 100,
72 	encoding_latin_1 = 801,
73 	encoding_latin_2 = 802,
74 	encoding_cyrillic = 805,
75 	encoding_utf_8 = 1601
76 } encoding_type;
77 
78 /* Font translation table entry */
79 typedef struct font_table_tag {
80 	USHORT	usFontStyle;
81 	UCHAR	ucWordFontNumber;
82 	UCHAR	ucFFN;
83 	UCHAR	ucEmphasis;
84 	UCHAR	ucInUse;
85 	char	szWordFontname[65];
86 	char	szOurFontname[33];
87 } font_table_type;
88 
89 /* Options */
90 typedef enum image_level_tag {
91 	level_gs_special = 0,
92 	level_no_images,
93 	level_ps_2,
94 	level_ps_3,
95 	level_default = level_ps_2
96 } image_level_enum;
97 
98 typedef struct options_tag {
99 	int		iParagraphBreak;
100 	conversion_type	eConversionType;
101 	BOOL		bHideHiddenText;
102 	BOOL		bRemoveRemovedText;
103 	BOOL		bUseLandscape;
104 	encoding_type	eEncoding;
105 	int		iPageHeight;		/* In points */
106 	int		iPageWidth;		/* In points */
107 	image_level_enum	eImageLevel;
108 #if defined(__riscos)
109 	BOOL		bAutofiletypeAllowed;
110 	int		iScaleFactor;		/* As a percentage */
111 #endif /* __riscos */
112 } options_type;
113 
114 /* Property Set Storage */
115 typedef struct pps_tag {
116 	ULONG	ulSB;
117 	ULONG	ulSize;
118 } pps_type;
119 typedef struct pps_info_tag {
120 	pps_type	tWordDocument;	/* Text stream */
121 	pps_type	tData;		/* Data stream */
122 	pps_type	tTable;		/* Table stream */
123 	pps_type	tSummaryInfo;	/* Summary Information */
124 	pps_type	tDocSummaryInfo;/* Document Summary Information */
125 	pps_type	t0Table;	/* Table 0 stream */
126 	pps_type	t1Table;	/* Table 1 stream */
127 } pps_info_type;
128 
129 /* Record of data block information */
130 typedef struct data_block_tag {
131 	ULONG	ulFileOffset;
132 	ULONG	ulDataPos;
133 	ULONG	ulLength;
134 } data_block_type;
135 
136 /* Record of text block information */
137 typedef struct text_block_tag {
138 	ULONG	ulFileOffset;
139 	ULONG	ulCharPos;
140 	ULONG	ulLength;
141 	BOOL	bUsesUnicode;	/* This block uses 16 bits per character */
142 	USHORT	usPropMod;
143 } text_block_type;
144 
145 /* Record of the document block information */
146 typedef struct document_block_tag {
147 	time_t	tCreateDate;		/* Unix timestamp */
148 	time_t	tRevisedDate;		/* Unix timestamp */
149 	USHORT	usDefaultTabWidth;	/* In twips */
150 	UCHAR	ucHdrFtrSpecification;
151 } document_block_type;
152 
153 /* Record of table-row block information */
154 typedef struct row_block_tag {
155 	ULONG	ulFileOffsetStart;
156 	ULONG	ulFileOffsetEnd;
157 	ULONG	ulCharPosStart;
158 	ULONG	ulCharPosEnd;
159 	short	asColumnWidth[TABLE_COLUMN_MAX+1];	/* In twips */
160 	UCHAR	ucNumberOfColumns;
161 	UCHAR	ucBorderInfo;
162 } row_block_type;
163 
164 /* Various level types */
165 typedef enum level_type_tag {
166 	level_type_none = 0,
167 	level_type_outline,
168 	level_type_numbering,
169 	level_type_sequence,
170 	level_type_pause
171 } level_type_enum;
172 
173 typedef enum list_id_tag {
174 	no_list = 0,
175 	text_list,
176 	footnote_list,
177 	hdrftr_list,
178 	macro_list,
179 	annotation_list,
180 	endnote_list,
181 	textbox_list,
182 	hdrtextbox_list,
183 	end_of_lists
184 } list_id_enum;
185 
186 /* Linked list of style description information */
187 typedef struct style_block_tag {
188 	ULONG	ulFileOffset;   /* The style start with this character */
189 	list_id_enum	eListID;/* The fileoffset is in this list */
190 	BOOL	bNumPause;
191 	BOOL	bNoRestart;	/* Don't restart by more significant levels */
192 	USHORT	usIstd;		/* Current style */
193 	USHORT	usIstdNext;	/* Next style unless overruled */
194 	USHORT	usStartAt;	/* Number at the start of a list */
195 	USHORT	usBeforeIndent;	/* Vertical indent before paragraph in twips */
196 	USHORT	usAfterIndent;	/* Vertical indent after paragraph in twips */
197 	USHORT	usListIndex;	/* Before Word 8 this field was not filled */
198 	USHORT	usListChar;	/* Character for an itemized list (Unicode) */
199 	short	sLeftIndent;	/* Left indentation in twips */
200 	short	sLeftIndent1;	/* First line left indentation in twips */
201 	short	sRightIndent;	/* Right indentation in twips */
202 	UCHAR	ucAlignment;
203 	UCHAR	ucNFC;		/* Number format code */
204 	UCHAR	ucNumLevel;
205 	UCHAR	ucListLevel;	/* Before Word 8 this field was not filled */
206 	char	szListChar[4];	/* Character for an itemized list */
207 } style_block_type;
208 
209 /* Font description information */
210 typedef struct font_block_tag {
211 	ULONG	ulFileOffset;
212 	USHORT	usFontStyle;
213 	USHORT	usFontSize;
214 	UCHAR	ucFontNumber;
215 	UCHAR	ucFontColor;
216 } font_block_type;
217 
218 /* Picture description information */
219 typedef struct picture_block_tag {
220 	ULONG	ulFileOffset;
221 	ULONG	ulFileOffsetPicture;
222 	ULONG	ulPictureOffset;
223 } picture_block_type;
224 
225 /* Section description information */
226 typedef struct section_block_tag {
227 	BOOL	bNewPage;
228 	USHORT	usNeedPrevLvl;		/* Print previous level numbers */
229 	USHORT	usHangingIndent;
230 	UCHAR	aucNFC[9];		/* Number format code */
231 	UCHAR	ucHdrFtrSpecification;	/* Which headers/footers Word < 8 */
232 } section_block_type;
233 
234 /* Header/footer description information */
235 typedef struct hdrftr_block_tag {
236 	output_type	*pText;
237 	long		lHeight;	/* In DrawUnits */
238 } hdrftr_block_type;
239 
240 /* Footnote description information */
241 typedef struct footnote_block_tag {
242 	char		*szText;
243 } footnote_block_type;
244 
245 /* List description information */
246 typedef struct list_block_tag {
247 	ULONG	ulStartAt;	/* Number at the start of a list */
248 	BOOL	bNoRestart;	/* Don't restart by more significant levels */
249 	USHORT	usListChar;	/* Character for an itemized list (Unicode) */
250 	short	sLeftIndent;	/* Left indentation in twips */
251 	UCHAR	ucNFC;		/* Number format code */
252 } list_block_type;
253 
254 /* Types of images */
255 typedef enum imagetype_tag {
256 	imagetype_is_unknown = 0,
257 	imagetype_is_external,
258 	imagetype_is_emf,
259 	imagetype_is_wmf,
260 	imagetype_is_pict,
261 	imagetype_is_jpeg,
262 	imagetype_is_png,
263 	imagetype_is_dib
264 } imagetype_enum;
265 
266 /* Types of compression */
267 typedef enum compression_tag {
268 	compression_unknown = 0,
269 	compression_none,
270 	compression_rle4,
271 	compression_rle8,
272 	compression_jpeg,
273 	compression_zlib
274 } compression_enum;
275 
276 /* Image information */
277 typedef struct imagedata_tag {
278 	/* The type of the image */
279 	imagetype_enum	eImageType;
280 	/* Information from the Word document */
281 	size_t	tPosition;
282 	size_t	tLength;
283 	int	iHorSizeScaled;		/* Size in points */
284 	int	iVerSizeScaled;		/* Size in points */
285 	/* Information from the image */
286 	int	iWidth;			/* Size in pixels */
287 	int	iHeight;		/* Size in pixels */
288 	int	iComponents;		/* Number of color components */
289 	UINT	uiBitsPerComponent;	/* Bits per color component */
290 	BOOL	bAdobe;	/* Image includes Adobe comment marker */
291 	compression_enum	eCompression;	/* Type of compression */
292 	BOOL	bColorImage;	/* Is color image */
293 	int	iColorsUsed;	/* 0 = uses the maximum number of colors */
294 	UCHAR 	aucPalette[256][3];	/* RGB palette */
295 } imagedata_type;
296 
297 typedef enum row_info_tag {
298 	found_nothing,
299 	found_a_cell,
300 	found_not_a_cell,
301 	found_end_of_row,
302 	found_not_end_of_row
303 } row_info_enum;
304 
305 typedef enum notetype_tag {
306 	notetype_is_footnote,
307 	notetype_is_endnote,
308 	notetype_is_unknown
309 } notetype_enum;
310 
311 typedef enum image_info_tag {
312 	image_no_information,
313 	image_minimal_information,
314 	image_full_information
315 } image_info_enum;
316 
317 #endif /* __wordtypes_h */
318