1*89a07cf8Schristos /* $NetBSD: ref.cpp,v 1.1.1.1 2016/01/13 18:41:49 christos Exp $ */
2*89a07cf8Schristos
3*89a07cf8Schristos // -*- C++ -*-
4*89a07cf8Schristos /* Copyright (C) 1989, 1990, 1991, 1992, 2001, 2003
5*89a07cf8Schristos Free Software Foundation, Inc.
6*89a07cf8Schristos Written by James Clark (jjc@jclark.com)
7*89a07cf8Schristos
8*89a07cf8Schristos This file is part of groff.
9*89a07cf8Schristos
10*89a07cf8Schristos groff is free software; you can redistribute it and/or modify it under
11*89a07cf8Schristos the terms of the GNU General Public License as published by the Free
12*89a07cf8Schristos Software Foundation; either version 2, or (at your option) any later
13*89a07cf8Schristos version.
14*89a07cf8Schristos
15*89a07cf8Schristos groff is distributed in the hope that it will be useful, but WITHOUT ANY
16*89a07cf8Schristos WARRANTY; without even the implied warranty of MERCHANTABILITY or
17*89a07cf8Schristos FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18*89a07cf8Schristos for more details.
19*89a07cf8Schristos
20*89a07cf8Schristos You should have received a copy of the GNU General Public License along
21*89a07cf8Schristos with groff; see the file COPYING. If not, write to the Free Software
22*89a07cf8Schristos Foundation, 51 Franklin St - Fifth Floor, Boston, MA 02110-1301, USA. */
23*89a07cf8Schristos
24*89a07cf8Schristos #include "refer.h"
25*89a07cf8Schristos #include "refid.h"
26*89a07cf8Schristos #include "ref.h"
27*89a07cf8Schristos #include "token.h"
28*89a07cf8Schristos
29*89a07cf8Schristos static const char *find_day(const char *, const char *, const char **);
30*89a07cf8Schristos static int find_month(const char *start, const char *end);
31*89a07cf8Schristos static void abbreviate_names(string &);
32*89a07cf8Schristos
33*89a07cf8Schristos #define DEFAULT_ARTICLES "the\000a\000an"
34*89a07cf8Schristos
35*89a07cf8Schristos string articles(DEFAULT_ARTICLES, sizeof(DEFAULT_ARTICLES));
36*89a07cf8Schristos
37*89a07cf8Schristos // Multiple occurrences of fields are separated by FIELD_SEPARATOR.
38*89a07cf8Schristos const char FIELD_SEPARATOR = '\0';
39*89a07cf8Schristos
40*89a07cf8Schristos const char MULTI_FIELD_NAMES[] = "AE";
41*89a07cf8Schristos const char *AUTHOR_FIELDS = "AQ";
42*89a07cf8Schristos
43*89a07cf8Schristos enum { OTHER, JOURNAL_ARTICLE, BOOK, ARTICLE_IN_BOOK, TECH_REPORT, BELL_TM };
44*89a07cf8Schristos
45*89a07cf8Schristos const char *reference_types[] = {
46*89a07cf8Schristos "other",
47*89a07cf8Schristos "journal-article",
48*89a07cf8Schristos "book",
49*89a07cf8Schristos "article-in-book",
50*89a07cf8Schristos "tech-report",
51*89a07cf8Schristos "bell-tm",
52*89a07cf8Schristos };
53*89a07cf8Schristos
54*89a07cf8Schristos static string temp_fields[256];
55*89a07cf8Schristos
reference(const char * start,int len,reference_id * ridp)56*89a07cf8Schristos reference::reference(const char *start, int len, reference_id *ridp)
57*89a07cf8Schristos : h(0), merged(0), no(-1), field(0), nfields(0), label_ptr(0),
58*89a07cf8Schristos computed_authors(0), last_needed_author(-1), nauthors(-1)
59*89a07cf8Schristos {
60*89a07cf8Schristos int i;
61*89a07cf8Schristos for (i = 0; i < 256; i++)
62*89a07cf8Schristos field_index[i] = NULL_FIELD_INDEX;
63*89a07cf8Schristos if (ridp)
64*89a07cf8Schristos rid = *ridp;
65*89a07cf8Schristos if (start == 0)
66*89a07cf8Schristos return;
67*89a07cf8Schristos if (len <= 0)
68*89a07cf8Schristos return;
69*89a07cf8Schristos const char *end = start + len;
70*89a07cf8Schristos const char *ptr = start;
71*89a07cf8Schristos assert(*ptr == '%');
72*89a07cf8Schristos while (ptr < end) {
73*89a07cf8Schristos if (ptr + 1 < end && ptr[1] != '\0'
74*89a07cf8Schristos && ((ptr[1] != '%' && ptr[1] == annotation_field)
75*89a07cf8Schristos || (ptr + 2 < end && ptr[1] == '%' && ptr[2] != '\0'
76*89a07cf8Schristos && discard_fields.search(ptr[2]) < 0))) {
77*89a07cf8Schristos if (ptr[1] == '%')
78*89a07cf8Schristos ptr++;
79*89a07cf8Schristos string &f = temp_fields[(unsigned char)ptr[1]];
80*89a07cf8Schristos ptr += 2;
81*89a07cf8Schristos while (ptr < end && csspace(*ptr))
82*89a07cf8Schristos ptr++;
83*89a07cf8Schristos for (;;) {
84*89a07cf8Schristos for (;;) {
85*89a07cf8Schristos if (ptr >= end) {
86*89a07cf8Schristos f += '\n';
87*89a07cf8Schristos break;
88*89a07cf8Schristos }
89*89a07cf8Schristos f += *ptr;
90*89a07cf8Schristos if (*ptr++ == '\n')
91*89a07cf8Schristos break;
92*89a07cf8Schristos }
93*89a07cf8Schristos if (ptr >= end || *ptr == '%')
94*89a07cf8Schristos break;
95*89a07cf8Schristos }
96*89a07cf8Schristos }
97*89a07cf8Schristos else if (ptr + 1 < end && ptr[1] != '\0' && ptr[1] != '%'
98*89a07cf8Schristos && discard_fields.search(ptr[1]) < 0) {
99*89a07cf8Schristos string &f = temp_fields[(unsigned char)ptr[1]];
100*89a07cf8Schristos if (f.length() > 0) {
101*89a07cf8Schristos if (strchr(MULTI_FIELD_NAMES, ptr[1]) != 0)
102*89a07cf8Schristos f += FIELD_SEPARATOR;
103*89a07cf8Schristos else
104*89a07cf8Schristos f.clear();
105*89a07cf8Schristos }
106*89a07cf8Schristos ptr += 2;
107*89a07cf8Schristos if (ptr < end) {
108*89a07cf8Schristos if (*ptr == ' ')
109*89a07cf8Schristos ptr++;
110*89a07cf8Schristos for (;;) {
111*89a07cf8Schristos const char *p = ptr;
112*89a07cf8Schristos while (ptr < end && *ptr != '\n')
113*89a07cf8Schristos ptr++;
114*89a07cf8Schristos // strip trailing white space
115*89a07cf8Schristos const char *q = ptr;
116*89a07cf8Schristos while (q > p && q[-1] != '\n' && csspace(q[-1]))
117*89a07cf8Schristos q--;
118*89a07cf8Schristos while (p < q)
119*89a07cf8Schristos f += *p++;
120*89a07cf8Schristos if (ptr >= end)
121*89a07cf8Schristos break;
122*89a07cf8Schristos ptr++;
123*89a07cf8Schristos if (ptr >= end)
124*89a07cf8Schristos break;
125*89a07cf8Schristos if (*ptr == '%')
126*89a07cf8Schristos break;
127*89a07cf8Schristos f += ' ';
128*89a07cf8Schristos }
129*89a07cf8Schristos }
130*89a07cf8Schristos }
131*89a07cf8Schristos else {
132*89a07cf8Schristos // skip this field
133*89a07cf8Schristos for (;;) {
134*89a07cf8Schristos while (ptr < end && *ptr++ != '\n')
135*89a07cf8Schristos ;
136*89a07cf8Schristos if (ptr >= end || *ptr == '%')
137*89a07cf8Schristos break;
138*89a07cf8Schristos }
139*89a07cf8Schristos }
140*89a07cf8Schristos }
141*89a07cf8Schristos for (i = 0; i < 256; i++)
142*89a07cf8Schristos if (temp_fields[i].length() > 0)
143*89a07cf8Schristos nfields++;
144*89a07cf8Schristos field = new string[nfields];
145*89a07cf8Schristos int j = 0;
146*89a07cf8Schristos for (i = 0; i < 256; i++)
147*89a07cf8Schristos if (temp_fields[i].length() > 0) {
148*89a07cf8Schristos field[j].move(temp_fields[i]);
149*89a07cf8Schristos if (abbreviate_fields.search(i) >= 0)
150*89a07cf8Schristos abbreviate_names(field[j]);
151*89a07cf8Schristos field_index[i] = j;
152*89a07cf8Schristos j++;
153*89a07cf8Schristos }
154*89a07cf8Schristos }
155*89a07cf8Schristos
~reference()156*89a07cf8Schristos reference::~reference()
157*89a07cf8Schristos {
158*89a07cf8Schristos if (nfields > 0)
159*89a07cf8Schristos ad_delete(nfields) field;
160*89a07cf8Schristos }
161*89a07cf8Schristos
162*89a07cf8Schristos // ref is the inline, this is the database ref
163*89a07cf8Schristos
merge(reference & ref)164*89a07cf8Schristos void reference::merge(reference &ref)
165*89a07cf8Schristos {
166*89a07cf8Schristos int i;
167*89a07cf8Schristos for (i = 0; i < 256; i++)
168*89a07cf8Schristos if (field_index[i] != NULL_FIELD_INDEX)
169*89a07cf8Schristos temp_fields[i].move(field[field_index[i]]);
170*89a07cf8Schristos for (i = 0; i < 256; i++)
171*89a07cf8Schristos if (ref.field_index[i] != NULL_FIELD_INDEX)
172*89a07cf8Schristos temp_fields[i].move(ref.field[ref.field_index[i]]);
173*89a07cf8Schristos for (i = 0; i < 256; i++)
174*89a07cf8Schristos field_index[i] = NULL_FIELD_INDEX;
175*89a07cf8Schristos int old_nfields = nfields;
176*89a07cf8Schristos nfields = 0;
177*89a07cf8Schristos for (i = 0; i < 256; i++)
178*89a07cf8Schristos if (temp_fields[i].length() > 0)
179*89a07cf8Schristos nfields++;
180*89a07cf8Schristos if (nfields != old_nfields) {
181*89a07cf8Schristos if (old_nfields > 0)
182*89a07cf8Schristos ad_delete(old_nfields) field;
183*89a07cf8Schristos field = new string[nfields];
184*89a07cf8Schristos }
185*89a07cf8Schristos int j = 0;
186*89a07cf8Schristos for (i = 0; i < 256; i++)
187*89a07cf8Schristos if (temp_fields[i].length() > 0) {
188*89a07cf8Schristos field[j].move(temp_fields[i]);
189*89a07cf8Schristos field_index[i] = j;
190*89a07cf8Schristos j++;
191*89a07cf8Schristos }
192*89a07cf8Schristos merged = 1;
193*89a07cf8Schristos }
194*89a07cf8Schristos
insert_field(unsigned char c,string & s)195*89a07cf8Schristos void reference::insert_field(unsigned char c, string &s)
196*89a07cf8Schristos {
197*89a07cf8Schristos assert(s.length() > 0);
198*89a07cf8Schristos if (field_index[c] != NULL_FIELD_INDEX) {
199*89a07cf8Schristos field[field_index[c]].move(s);
200*89a07cf8Schristos return;
201*89a07cf8Schristos }
202*89a07cf8Schristos assert(field_index[c] == NULL_FIELD_INDEX);
203*89a07cf8Schristos string *old_field = field;
204*89a07cf8Schristos field = new string[nfields + 1];
205*89a07cf8Schristos int pos = 0;
206*89a07cf8Schristos int i;
207*89a07cf8Schristos for (i = 0; i < int(c); i++)
208*89a07cf8Schristos if (field_index[i] != NULL_FIELD_INDEX)
209*89a07cf8Schristos pos++;
210*89a07cf8Schristos for (i = 0; i < pos; i++)
211*89a07cf8Schristos field[i].move(old_field[i]);
212*89a07cf8Schristos field[pos].move(s);
213*89a07cf8Schristos for (i = pos; i < nfields; i++)
214*89a07cf8Schristos field[i + 1].move(old_field[i]);
215*89a07cf8Schristos if (nfields > 0)
216*89a07cf8Schristos ad_delete(nfields) old_field;
217*89a07cf8Schristos nfields++;
218*89a07cf8Schristos field_index[c] = pos;
219*89a07cf8Schristos for (i = c + 1; i < 256; i++)
220*89a07cf8Schristos if (field_index[i] != NULL_FIELD_INDEX)
221*89a07cf8Schristos field_index[i] += 1;
222*89a07cf8Schristos }
223*89a07cf8Schristos
delete_field(unsigned char c)224*89a07cf8Schristos void reference::delete_field(unsigned char c)
225*89a07cf8Schristos {
226*89a07cf8Schristos if (field_index[c] == NULL_FIELD_INDEX)
227*89a07cf8Schristos return;
228*89a07cf8Schristos string *old_field = field;
229*89a07cf8Schristos field = new string[nfields - 1];
230*89a07cf8Schristos int i;
231*89a07cf8Schristos for (i = 0; i < int(field_index[c]); i++)
232*89a07cf8Schristos field[i].move(old_field[i]);
233*89a07cf8Schristos for (i = field_index[c]; i < nfields - 1; i++)
234*89a07cf8Schristos field[i].move(old_field[i + 1]);
235*89a07cf8Schristos if (nfields > 0)
236*89a07cf8Schristos ad_delete(nfields) old_field;
237*89a07cf8Schristos nfields--;
238*89a07cf8Schristos field_index[c] = NULL_FIELD_INDEX;
239*89a07cf8Schristos for (i = c + 1; i < 256; i++)
240*89a07cf8Schristos if (field_index[i] != NULL_FIELD_INDEX)
241*89a07cf8Schristos field_index[i] -= 1;
242*89a07cf8Schristos }
243*89a07cf8Schristos
compute_hash_code()244*89a07cf8Schristos void reference::compute_hash_code()
245*89a07cf8Schristos {
246*89a07cf8Schristos if (!rid.is_null())
247*89a07cf8Schristos h = rid.hash();
248*89a07cf8Schristos else {
249*89a07cf8Schristos h = 0;
250*89a07cf8Schristos for (int i = 0; i < nfields; i++)
251*89a07cf8Schristos if (field[i].length() > 0) {
252*89a07cf8Schristos h <<= 4;
253*89a07cf8Schristos h ^= hash_string(field[i].contents(), field[i].length());
254*89a07cf8Schristos }
255*89a07cf8Schristos }
256*89a07cf8Schristos }
257*89a07cf8Schristos
set_number(int n)258*89a07cf8Schristos void reference::set_number(int n)
259*89a07cf8Schristos {
260*89a07cf8Schristos no = n;
261*89a07cf8Schristos }
262*89a07cf8Schristos
263*89a07cf8Schristos const char SORT_SEP = '\001';
264*89a07cf8Schristos const char SORT_SUB_SEP = '\002';
265*89a07cf8Schristos const char SORT_SUB_SUB_SEP = '\003';
266*89a07cf8Schristos
267*89a07cf8Schristos // sep specifies additional word separators
268*89a07cf8Schristos
sortify_words(const char * s,const char * end,const char * sep,string & result)269*89a07cf8Schristos void sortify_words(const char *s, const char *end, const char *sep,
270*89a07cf8Schristos string &result)
271*89a07cf8Schristos {
272*89a07cf8Schristos int non_empty = 0;
273*89a07cf8Schristos int need_separator = 0;
274*89a07cf8Schristos for (;;) {
275*89a07cf8Schristos const char *token_start = s;
276*89a07cf8Schristos if (!get_token(&s, end))
277*89a07cf8Schristos break;
278*89a07cf8Schristos if ((s - token_start == 1
279*89a07cf8Schristos && (*token_start == ' '
280*89a07cf8Schristos || *token_start == '\n'
281*89a07cf8Schristos || (sep && *token_start != '\0'
282*89a07cf8Schristos && strchr(sep, *token_start) != 0)))
283*89a07cf8Schristos || (s - token_start == 2
284*89a07cf8Schristos && token_start[0] == '\\' && token_start[1] == ' ')) {
285*89a07cf8Schristos if (non_empty)
286*89a07cf8Schristos need_separator = 1;
287*89a07cf8Schristos }
288*89a07cf8Schristos else {
289*89a07cf8Schristos const token_info *ti = lookup_token(token_start, s);
290*89a07cf8Schristos if (ti->sortify_non_empty(token_start, s)) {
291*89a07cf8Schristos if (need_separator) {
292*89a07cf8Schristos result += ' ';
293*89a07cf8Schristos need_separator = 0;
294*89a07cf8Schristos }
295*89a07cf8Schristos ti->sortify(token_start, s, result);
296*89a07cf8Schristos non_empty = 1;
297*89a07cf8Schristos }
298*89a07cf8Schristos }
299*89a07cf8Schristos }
300*89a07cf8Schristos }
301*89a07cf8Schristos
sortify_word(const char * s,const char * end,string & result)302*89a07cf8Schristos void sortify_word(const char *s, const char *end, string &result)
303*89a07cf8Schristos {
304*89a07cf8Schristos for (;;) {
305*89a07cf8Schristos const char *token_start = s;
306*89a07cf8Schristos if (!get_token(&s, end))
307*89a07cf8Schristos break;
308*89a07cf8Schristos const token_info *ti = lookup_token(token_start, s);
309*89a07cf8Schristos ti->sortify(token_start, s, result);
310*89a07cf8Schristos }
311*89a07cf8Schristos }
312*89a07cf8Schristos
sortify_other(const char * s,int len,string & key)313*89a07cf8Schristos void sortify_other(const char *s, int len, string &key)
314*89a07cf8Schristos {
315*89a07cf8Schristos sortify_words(s, s + len, 0, key);
316*89a07cf8Schristos }
317*89a07cf8Schristos
sortify_title(const char * s,int len,string & key)318*89a07cf8Schristos void sortify_title(const char *s, int len, string &key)
319*89a07cf8Schristos {
320*89a07cf8Schristos const char *end = s + len;
321*89a07cf8Schristos for (; s < end && (*s == ' ' || *s == '\n'); s++)
322*89a07cf8Schristos ;
323*89a07cf8Schristos const char *ptr = s;
324*89a07cf8Schristos for (;;) {
325*89a07cf8Schristos const char *token_start = ptr;
326*89a07cf8Schristos if (!get_token(&ptr, end))
327*89a07cf8Schristos break;
328*89a07cf8Schristos if (ptr - token_start == 1
329*89a07cf8Schristos && (*token_start == ' ' || *token_start == '\n'))
330*89a07cf8Schristos break;
331*89a07cf8Schristos }
332*89a07cf8Schristos if (ptr < end) {
333*89a07cf8Schristos unsigned int first_word_len = ptr - s - 1;
334*89a07cf8Schristos const char *ae = articles.contents() + articles.length();
335*89a07cf8Schristos for (const char *a = articles.contents();
336*89a07cf8Schristos a < ae;
337*89a07cf8Schristos a = strchr(a, '\0') + 1)
338*89a07cf8Schristos if (first_word_len == strlen(a)) {
339*89a07cf8Schristos unsigned int j;
340*89a07cf8Schristos for (j = 0; j < first_word_len; j++)
341*89a07cf8Schristos if (a[j] != cmlower(s[j]))
342*89a07cf8Schristos break;
343*89a07cf8Schristos if (j >= first_word_len) {
344*89a07cf8Schristos s = ptr;
345*89a07cf8Schristos for (; s < end && (*s == ' ' || *s == '\n'); s++)
346*89a07cf8Schristos ;
347*89a07cf8Schristos break;
348*89a07cf8Schristos }
349*89a07cf8Schristos }
350*89a07cf8Schristos }
351*89a07cf8Schristos sortify_words(s, end, 0, key);
352*89a07cf8Schristos }
353*89a07cf8Schristos
sortify_name(const char * s,int len,string & key)354*89a07cf8Schristos void sortify_name(const char *s, int len, string &key)
355*89a07cf8Schristos {
356*89a07cf8Schristos const char *last_name_end;
357*89a07cf8Schristos const char *last_name = find_last_name(s, s + len, &last_name_end);
358*89a07cf8Schristos sortify_word(last_name, last_name_end, key);
359*89a07cf8Schristos key += SORT_SUB_SUB_SEP;
360*89a07cf8Schristos if (last_name > s)
361*89a07cf8Schristos sortify_words(s, last_name, ".", key);
362*89a07cf8Schristos key += SORT_SUB_SUB_SEP;
363*89a07cf8Schristos if (last_name_end < s + len)
364*89a07cf8Schristos sortify_words(last_name_end, s + len, ".,", key);
365*89a07cf8Schristos }
366*89a07cf8Schristos
sortify_date(const char * s,int len,string & key)367*89a07cf8Schristos void sortify_date(const char *s, int len, string &key)
368*89a07cf8Schristos {
369*89a07cf8Schristos const char *year_end;
370*89a07cf8Schristos const char *year_start = find_year(s, s + len, &year_end);
371*89a07cf8Schristos if (!year_start) {
372*89a07cf8Schristos // Things without years are often `forthcoming', so it makes sense
373*89a07cf8Schristos // that they sort after things with explicit years.
374*89a07cf8Schristos key += 'A';
375*89a07cf8Schristos sortify_words(s, s + len, 0, key);
376*89a07cf8Schristos return;
377*89a07cf8Schristos }
378*89a07cf8Schristos int n = year_end - year_start;
379*89a07cf8Schristos while (n < 4) {
380*89a07cf8Schristos key += '0';
381*89a07cf8Schristos n++;
382*89a07cf8Schristos }
383*89a07cf8Schristos while (year_start < year_end)
384*89a07cf8Schristos key += *year_start++;
385*89a07cf8Schristos int m = find_month(s, s + len);
386*89a07cf8Schristos if (m < 0)
387*89a07cf8Schristos return;
388*89a07cf8Schristos key += 'A' + m;
389*89a07cf8Schristos const char *day_end;
390*89a07cf8Schristos const char *day_start = find_day(s, s + len, &day_end);
391*89a07cf8Schristos if (!day_start)
392*89a07cf8Schristos return;
393*89a07cf8Schristos if (day_end - day_start == 1)
394*89a07cf8Schristos key += '0';
395*89a07cf8Schristos while (day_start < day_end)
396*89a07cf8Schristos key += *day_start++;
397*89a07cf8Schristos }
398*89a07cf8Schristos
399*89a07cf8Schristos // SORT_{SUB,SUB_SUB}_SEP can creep in from use of @ in label specification.
400*89a07cf8Schristos
sortify_label(const char * s,int len,string & key)401*89a07cf8Schristos void sortify_label(const char *s, int len, string &key)
402*89a07cf8Schristos {
403*89a07cf8Schristos const char *end = s + len;
404*89a07cf8Schristos for (;;) {
405*89a07cf8Schristos const char *ptr;
406*89a07cf8Schristos for (ptr = s;
407*89a07cf8Schristos ptr < end && *ptr != SORT_SUB_SEP && *ptr != SORT_SUB_SUB_SEP;
408*89a07cf8Schristos ptr++)
409*89a07cf8Schristos ;
410*89a07cf8Schristos if (ptr > s)
411*89a07cf8Schristos sortify_words(s, ptr, 0, key);
412*89a07cf8Schristos s = ptr;
413*89a07cf8Schristos if (s >= end)
414*89a07cf8Schristos break;
415*89a07cf8Schristos key += *s++;
416*89a07cf8Schristos }
417*89a07cf8Schristos }
418*89a07cf8Schristos
compute_sort_key()419*89a07cf8Schristos void reference::compute_sort_key()
420*89a07cf8Schristos {
421*89a07cf8Schristos if (sort_fields.length() == 0)
422*89a07cf8Schristos return;
423*89a07cf8Schristos sort_fields += '\0';
424*89a07cf8Schristos const char *sf = sort_fields.contents();
425*89a07cf8Schristos while (*sf != '\0') {
426*89a07cf8Schristos sort_key += SORT_SEP;
427*89a07cf8Schristos char f = *sf++;
428*89a07cf8Schristos int n = 1;
429*89a07cf8Schristos if (*sf == '+') {
430*89a07cf8Schristos n = INT_MAX;
431*89a07cf8Schristos sf++;
432*89a07cf8Schristos }
433*89a07cf8Schristos else if (csdigit(*sf)) {
434*89a07cf8Schristos char *ptr;
435*89a07cf8Schristos long l = strtol(sf, &ptr, 10);
436*89a07cf8Schristos if (l == 0 && ptr == sf)
437*89a07cf8Schristos ;
438*89a07cf8Schristos else {
439*89a07cf8Schristos sf = ptr;
440*89a07cf8Schristos if (l < 0) {
441*89a07cf8Schristos n = 1;
442*89a07cf8Schristos }
443*89a07cf8Schristos else {
444*89a07cf8Schristos n = int(l);
445*89a07cf8Schristos }
446*89a07cf8Schristos }
447*89a07cf8Schristos }
448*89a07cf8Schristos if (f == '.')
449*89a07cf8Schristos sortify_label(label.contents(), label.length(), sort_key);
450*89a07cf8Schristos else if (f == AUTHOR_FIELDS[0])
451*89a07cf8Schristos sortify_authors(n, sort_key);
452*89a07cf8Schristos else
453*89a07cf8Schristos sortify_field(f, n, sort_key);
454*89a07cf8Schristos }
455*89a07cf8Schristos sort_fields.set_length(sort_fields.length() - 1);
456*89a07cf8Schristos }
457*89a07cf8Schristos
sortify_authors(int n,string & result) const458*89a07cf8Schristos void reference::sortify_authors(int n, string &result) const
459*89a07cf8Schristos {
460*89a07cf8Schristos for (const char *p = AUTHOR_FIELDS; *p != '\0'; p++)
461*89a07cf8Schristos if (contains_field(*p)) {
462*89a07cf8Schristos sortify_field(*p, n, result);
463*89a07cf8Schristos return;
464*89a07cf8Schristos }
465*89a07cf8Schristos sortify_field(AUTHOR_FIELDS[0], n, result);
466*89a07cf8Schristos }
467*89a07cf8Schristos
canonicalize_authors(string & result) const468*89a07cf8Schristos void reference::canonicalize_authors(string &result) const
469*89a07cf8Schristos {
470*89a07cf8Schristos int len = result.length();
471*89a07cf8Schristos sortify_authors(INT_MAX, result);
472*89a07cf8Schristos if (result.length() > len)
473*89a07cf8Schristos result += SORT_SUB_SEP;
474*89a07cf8Schristos }
475*89a07cf8Schristos
sortify_field(unsigned char f,int n,string & result) const476*89a07cf8Schristos void reference::sortify_field(unsigned char f, int n, string &result) const
477*89a07cf8Schristos {
478*89a07cf8Schristos typedef void (*sortify_t)(const char *, int, string &);
479*89a07cf8Schristos sortify_t sortifier = sortify_other;
480*89a07cf8Schristos switch (f) {
481*89a07cf8Schristos case 'A':
482*89a07cf8Schristos case 'E':
483*89a07cf8Schristos sortifier = sortify_name;
484*89a07cf8Schristos break;
485*89a07cf8Schristos case 'D':
486*89a07cf8Schristos sortifier = sortify_date;
487*89a07cf8Schristos break;
488*89a07cf8Schristos case 'B':
489*89a07cf8Schristos case 'J':
490*89a07cf8Schristos case 'T':
491*89a07cf8Schristos sortifier = sortify_title;
492*89a07cf8Schristos break;
493*89a07cf8Schristos }
494*89a07cf8Schristos int fi = field_index[(unsigned char)f];
495*89a07cf8Schristos if (fi != NULL_FIELD_INDEX) {
496*89a07cf8Schristos string &str = field[fi];
497*89a07cf8Schristos const char *start = str.contents();
498*89a07cf8Schristos const char *end = start + str.length();
499*89a07cf8Schristos for (int i = 0; i < n && start < end; i++) {
500*89a07cf8Schristos const char *p = start;
501*89a07cf8Schristos while (start < end && *start != FIELD_SEPARATOR)
502*89a07cf8Schristos start++;
503*89a07cf8Schristos if (i > 0)
504*89a07cf8Schristos result += SORT_SUB_SEP;
505*89a07cf8Schristos (*sortifier)(p, start - p, result);
506*89a07cf8Schristos if (start < end)
507*89a07cf8Schristos start++;
508*89a07cf8Schristos }
509*89a07cf8Schristos }
510*89a07cf8Schristos }
511*89a07cf8Schristos
compare_reference(const reference & r1,const reference & r2)512*89a07cf8Schristos int compare_reference(const reference &r1, const reference &r2)
513*89a07cf8Schristos {
514*89a07cf8Schristos assert(r1.no >= 0);
515*89a07cf8Schristos assert(r2.no >= 0);
516*89a07cf8Schristos const char *s1 = r1.sort_key.contents();
517*89a07cf8Schristos int n1 = r1.sort_key.length();
518*89a07cf8Schristos const char *s2 = r2.sort_key.contents();
519*89a07cf8Schristos int n2 = r2.sort_key.length();
520*89a07cf8Schristos for (; n1 > 0 && n2 > 0; --n1, --n2, ++s1, ++s2)
521*89a07cf8Schristos if (*s1 != *s2)
522*89a07cf8Schristos return (int)(unsigned char)*s1 - (int)(unsigned char)*s2;
523*89a07cf8Schristos if (n2 > 0)
524*89a07cf8Schristos return -1;
525*89a07cf8Schristos if (n1 > 0)
526*89a07cf8Schristos return 1;
527*89a07cf8Schristos return r1.no - r2.no;
528*89a07cf8Schristos }
529*89a07cf8Schristos
same_reference(const reference & r1,const reference & r2)530*89a07cf8Schristos int same_reference(const reference &r1, const reference &r2)
531*89a07cf8Schristos {
532*89a07cf8Schristos if (!r1.rid.is_null() && r1.rid == r2.rid)
533*89a07cf8Schristos return 1;
534*89a07cf8Schristos if (r1.h != r2.h)
535*89a07cf8Schristos return 0;
536*89a07cf8Schristos if (r1.nfields != r2.nfields)
537*89a07cf8Schristos return 0;
538*89a07cf8Schristos int i = 0;
539*89a07cf8Schristos for (i = 0; i < 256; i++)
540*89a07cf8Schristos if (r1.field_index != r2.field_index)
541*89a07cf8Schristos return 0;
542*89a07cf8Schristos for (i = 0; i < r1.nfields; i++)
543*89a07cf8Schristos if (r1.field[i] != r2.field[i])
544*89a07cf8Schristos return 0;
545*89a07cf8Schristos return 1;
546*89a07cf8Schristos }
547*89a07cf8Schristos
find_last_name(const char * start,const char * end,const char ** endp)548*89a07cf8Schristos const char *find_last_name(const char *start, const char *end,
549*89a07cf8Schristos const char **endp)
550*89a07cf8Schristos {
551*89a07cf8Schristos const char *ptr = start;
552*89a07cf8Schristos const char *last_word = start;
553*89a07cf8Schristos for (;;) {
554*89a07cf8Schristos const char *token_start = ptr;
555*89a07cf8Schristos if (!get_token(&ptr, end))
556*89a07cf8Schristos break;
557*89a07cf8Schristos if (ptr - token_start == 1) {
558*89a07cf8Schristos if (*token_start == ',') {
559*89a07cf8Schristos *endp = token_start;
560*89a07cf8Schristos return last_word;
561*89a07cf8Schristos }
562*89a07cf8Schristos else if (*token_start == ' ' || *token_start == '\n') {
563*89a07cf8Schristos if (ptr < end && *ptr != ' ' && *ptr != '\n')
564*89a07cf8Schristos last_word = ptr;
565*89a07cf8Schristos }
566*89a07cf8Schristos }
567*89a07cf8Schristos }
568*89a07cf8Schristos *endp = end;
569*89a07cf8Schristos return last_word;
570*89a07cf8Schristos }
571*89a07cf8Schristos
abbreviate_name(const char * ptr,const char * end,string & result)572*89a07cf8Schristos void abbreviate_name(const char *ptr, const char *end, string &result)
573*89a07cf8Schristos {
574*89a07cf8Schristos const char *last_name_end;
575*89a07cf8Schristos const char *last_name_start = find_last_name(ptr, end, &last_name_end);
576*89a07cf8Schristos int need_period = 0;
577*89a07cf8Schristos for (;;) {
578*89a07cf8Schristos const char *token_start = ptr;
579*89a07cf8Schristos if (!get_token(&ptr, last_name_start))
580*89a07cf8Schristos break;
581*89a07cf8Schristos const token_info *ti = lookup_token(token_start, ptr);
582*89a07cf8Schristos if (need_period) {
583*89a07cf8Schristos if ((ptr - token_start == 1 && *token_start == ' ')
584*89a07cf8Schristos || (ptr - token_start == 2 && token_start[0] == '\\'
585*89a07cf8Schristos && token_start[1] == ' '))
586*89a07cf8Schristos continue;
587*89a07cf8Schristos if (ti->is_upper())
588*89a07cf8Schristos result += period_before_initial;
589*89a07cf8Schristos else
590*89a07cf8Schristos result += period_before_other;
591*89a07cf8Schristos need_period = 0;
592*89a07cf8Schristos }
593*89a07cf8Schristos result.append(token_start, ptr - token_start);
594*89a07cf8Schristos if (ti->is_upper()) {
595*89a07cf8Schristos const char *lower_ptr = ptr;
596*89a07cf8Schristos int first_token = 1;
597*89a07cf8Schristos for (;;) {
598*89a07cf8Schristos token_start = ptr;
599*89a07cf8Schristos if (!get_token(&ptr, last_name_start))
600*89a07cf8Schristos break;
601*89a07cf8Schristos if ((ptr - token_start == 1 && *token_start == ' ')
602*89a07cf8Schristos || (ptr - token_start == 2 && token_start[0] == '\\'
603*89a07cf8Schristos && token_start[1] == ' '))
604*89a07cf8Schristos break;
605*89a07cf8Schristos ti = lookup_token(token_start, ptr);
606*89a07cf8Schristos if (ti->is_hyphen()) {
607*89a07cf8Schristos const char *ptr1 = ptr;
608*89a07cf8Schristos if (get_token(&ptr1, last_name_start)) {
609*89a07cf8Schristos ti = lookup_token(ptr, ptr1);
610*89a07cf8Schristos if (ti->is_upper()) {
611*89a07cf8Schristos result += period_before_hyphen;
612*89a07cf8Schristos result.append(token_start, ptr1 - token_start);
613*89a07cf8Schristos ptr = ptr1;
614*89a07cf8Schristos }
615*89a07cf8Schristos }
616*89a07cf8Schristos }
617*89a07cf8Schristos else if (ti->is_upper()) {
618*89a07cf8Schristos // MacDougal -> MacD.
619*89a07cf8Schristos result.append(lower_ptr, ptr - lower_ptr);
620*89a07cf8Schristos lower_ptr = ptr;
621*89a07cf8Schristos first_token = 1;
622*89a07cf8Schristos }
623*89a07cf8Schristos else if (first_token && ti->is_accent()) {
624*89a07cf8Schristos result.append(token_start, ptr - token_start);
625*89a07cf8Schristos lower_ptr = ptr;
626*89a07cf8Schristos }
627*89a07cf8Schristos first_token = 0;
628*89a07cf8Schristos }
629*89a07cf8Schristos need_period = 1;
630*89a07cf8Schristos }
631*89a07cf8Schristos }
632*89a07cf8Schristos if (need_period)
633*89a07cf8Schristos result += period_before_last_name;
634*89a07cf8Schristos result.append(last_name_start, end - last_name_start);
635*89a07cf8Schristos }
636*89a07cf8Schristos
abbreviate_names(string & result)637*89a07cf8Schristos static void abbreviate_names(string &result)
638*89a07cf8Schristos {
639*89a07cf8Schristos string str;
640*89a07cf8Schristos str.move(result);
641*89a07cf8Schristos const char *ptr = str.contents();
642*89a07cf8Schristos const char *end = ptr + str.length();
643*89a07cf8Schristos while (ptr < end) {
644*89a07cf8Schristos const char *name_end = (char *)memchr(ptr, FIELD_SEPARATOR, end - ptr);
645*89a07cf8Schristos if (name_end == 0)
646*89a07cf8Schristos name_end = end;
647*89a07cf8Schristos abbreviate_name(ptr, name_end, result);
648*89a07cf8Schristos if (name_end >= end)
649*89a07cf8Schristos break;
650*89a07cf8Schristos ptr = name_end + 1;
651*89a07cf8Schristos result += FIELD_SEPARATOR;
652*89a07cf8Schristos }
653*89a07cf8Schristos }
654*89a07cf8Schristos
reverse_name(const char * ptr,const char * name_end,string & result)655*89a07cf8Schristos void reverse_name(const char *ptr, const char *name_end, string &result)
656*89a07cf8Schristos {
657*89a07cf8Schristos const char *last_name_end;
658*89a07cf8Schristos const char *last_name_start = find_last_name(ptr, name_end, &last_name_end);
659*89a07cf8Schristos result.append(last_name_start, last_name_end - last_name_start);
660*89a07cf8Schristos while (last_name_start > ptr
661*89a07cf8Schristos && (last_name_start[-1] == ' ' || last_name_start[-1] == '\n'))
662*89a07cf8Schristos last_name_start--;
663*89a07cf8Schristos if (last_name_start > ptr) {
664*89a07cf8Schristos result += ", ";
665*89a07cf8Schristos result.append(ptr, last_name_start - ptr);
666*89a07cf8Schristos }
667*89a07cf8Schristos if (last_name_end < name_end)
668*89a07cf8Schristos result.append(last_name_end, name_end - last_name_end);
669*89a07cf8Schristos }
670*89a07cf8Schristos
reverse_names(string & result,int n)671*89a07cf8Schristos void reverse_names(string &result, int n)
672*89a07cf8Schristos {
673*89a07cf8Schristos if (n <= 0)
674*89a07cf8Schristos return;
675*89a07cf8Schristos string str;
676*89a07cf8Schristos str.move(result);
677*89a07cf8Schristos const char *ptr = str.contents();
678*89a07cf8Schristos const char *end = ptr + str.length();
679*89a07cf8Schristos while (ptr < end) {
680*89a07cf8Schristos if (--n < 0) {
681*89a07cf8Schristos result.append(ptr, end - ptr);
682*89a07cf8Schristos break;
683*89a07cf8Schristos }
684*89a07cf8Schristos const char *name_end = (char *)memchr(ptr, FIELD_SEPARATOR, end - ptr);
685*89a07cf8Schristos if (name_end == 0)
686*89a07cf8Schristos name_end = end;
687*89a07cf8Schristos reverse_name(ptr, name_end, result);
688*89a07cf8Schristos if (name_end >= end)
689*89a07cf8Schristos break;
690*89a07cf8Schristos ptr = name_end + 1;
691*89a07cf8Schristos result += FIELD_SEPARATOR;
692*89a07cf8Schristos }
693*89a07cf8Schristos }
694*89a07cf8Schristos
695*89a07cf8Schristos // Return number of field separators.
696*89a07cf8Schristos
join_fields(string & f)697*89a07cf8Schristos int join_fields(string &f)
698*89a07cf8Schristos {
699*89a07cf8Schristos const char *ptr = f.contents();
700*89a07cf8Schristos int len = f.length();
701*89a07cf8Schristos int nfield_seps = 0;
702*89a07cf8Schristos int j;
703*89a07cf8Schristos for (j = 0; j < len; j++)
704*89a07cf8Schristos if (ptr[j] == FIELD_SEPARATOR)
705*89a07cf8Schristos nfield_seps++;
706*89a07cf8Schristos if (nfield_seps == 0)
707*89a07cf8Schristos return 0;
708*89a07cf8Schristos string temp;
709*89a07cf8Schristos int field_seps_left = nfield_seps;
710*89a07cf8Schristos for (j = 0; j < len; j++) {
711*89a07cf8Schristos if (ptr[j] == FIELD_SEPARATOR) {
712*89a07cf8Schristos if (nfield_seps == 1)
713*89a07cf8Schristos temp += join_authors_exactly_two;
714*89a07cf8Schristos else if (--field_seps_left == 0)
715*89a07cf8Schristos temp += join_authors_last_two;
716*89a07cf8Schristos else
717*89a07cf8Schristos temp += join_authors_default;
718*89a07cf8Schristos }
719*89a07cf8Schristos else
720*89a07cf8Schristos temp += ptr[j];
721*89a07cf8Schristos }
722*89a07cf8Schristos f = temp;
723*89a07cf8Schristos return nfield_seps;
724*89a07cf8Schristos }
725*89a07cf8Schristos
uppercase(const char * start,const char * end,string & result)726*89a07cf8Schristos void uppercase(const char *start, const char *end, string &result)
727*89a07cf8Schristos {
728*89a07cf8Schristos for (;;) {
729*89a07cf8Schristos const char *token_start = start;
730*89a07cf8Schristos if (!get_token(&start, end))
731*89a07cf8Schristos break;
732*89a07cf8Schristos const token_info *ti = lookup_token(token_start, start);
733*89a07cf8Schristos ti->upper_case(token_start, start, result);
734*89a07cf8Schristos }
735*89a07cf8Schristos }
736*89a07cf8Schristos
lowercase(const char * start,const char * end,string & result)737*89a07cf8Schristos void lowercase(const char *start, const char *end, string &result)
738*89a07cf8Schristos {
739*89a07cf8Schristos for (;;) {
740*89a07cf8Schristos const char *token_start = start;
741*89a07cf8Schristos if (!get_token(&start, end))
742*89a07cf8Schristos break;
743*89a07cf8Schristos const token_info *ti = lookup_token(token_start, start);
744*89a07cf8Schristos ti->lower_case(token_start, start, result);
745*89a07cf8Schristos }
746*89a07cf8Schristos }
747*89a07cf8Schristos
capitalize(const char * ptr,const char * end,string & result)748*89a07cf8Schristos void capitalize(const char *ptr, const char *end, string &result)
749*89a07cf8Schristos {
750*89a07cf8Schristos int in_small_point_size = 0;
751*89a07cf8Schristos for (;;) {
752*89a07cf8Schristos const char *start = ptr;
753*89a07cf8Schristos if (!get_token(&ptr, end))
754*89a07cf8Schristos break;
755*89a07cf8Schristos const token_info *ti = lookup_token(start, ptr);
756*89a07cf8Schristos const char *char_end = ptr;
757*89a07cf8Schristos int is_lower = ti->is_lower();
758*89a07cf8Schristos if ((is_lower || ti->is_upper()) && get_token(&ptr, end)) {
759*89a07cf8Schristos const token_info *ti2 = lookup_token(char_end, ptr);
760*89a07cf8Schristos if (!ti2->is_accent())
761*89a07cf8Schristos ptr = char_end;
762*89a07cf8Schristos }
763*89a07cf8Schristos if (is_lower) {
764*89a07cf8Schristos if (!in_small_point_size) {
765*89a07cf8Schristos result += "\\s-2";
766*89a07cf8Schristos in_small_point_size = 1;
767*89a07cf8Schristos }
768*89a07cf8Schristos ti->upper_case(start, char_end, result);
769*89a07cf8Schristos result.append(char_end, ptr - char_end);
770*89a07cf8Schristos }
771*89a07cf8Schristos else {
772*89a07cf8Schristos if (in_small_point_size) {
773*89a07cf8Schristos result += "\\s+2";
774*89a07cf8Schristos in_small_point_size = 0;
775*89a07cf8Schristos }
776*89a07cf8Schristos result.append(start, ptr - start);
777*89a07cf8Schristos }
778*89a07cf8Schristos }
779*89a07cf8Schristos if (in_small_point_size)
780*89a07cf8Schristos result += "\\s+2";
781*89a07cf8Schristos }
782*89a07cf8Schristos
capitalize_field(string & str)783*89a07cf8Schristos void capitalize_field(string &str)
784*89a07cf8Schristos {
785*89a07cf8Schristos string temp;
786*89a07cf8Schristos capitalize(str.contents(), str.contents() + str.length(), temp);
787*89a07cf8Schristos str.move(temp);
788*89a07cf8Schristos }
789*89a07cf8Schristos
is_terminated(const char * ptr,const char * end)790*89a07cf8Schristos int is_terminated(const char *ptr, const char *end)
791*89a07cf8Schristos {
792*89a07cf8Schristos const char *last_token = end;
793*89a07cf8Schristos for (;;) {
794*89a07cf8Schristos const char *p = ptr;
795*89a07cf8Schristos if (!get_token(&ptr, end))
796*89a07cf8Schristos break;
797*89a07cf8Schristos last_token = p;
798*89a07cf8Schristos }
799*89a07cf8Schristos return end - last_token == 1
800*89a07cf8Schristos && (*last_token == '.' || *last_token == '!' || *last_token == '?');
801*89a07cf8Schristos }
802*89a07cf8Schristos
output(FILE * fp)803*89a07cf8Schristos void reference::output(FILE *fp)
804*89a07cf8Schristos {
805*89a07cf8Schristos fputs(".]-\n", fp);
806*89a07cf8Schristos for (int i = 0; i < 256; i++)
807*89a07cf8Schristos if (field_index[i] != NULL_FIELD_INDEX && i != annotation_field) {
808*89a07cf8Schristos string &f = field[field_index[i]];
809*89a07cf8Schristos if (!csdigit(i)) {
810*89a07cf8Schristos int j = reverse_fields.search(i);
811*89a07cf8Schristos if (j >= 0) {
812*89a07cf8Schristos int n;
813*89a07cf8Schristos int len = reverse_fields.length();
814*89a07cf8Schristos if (++j < len && csdigit(reverse_fields[j])) {
815*89a07cf8Schristos n = reverse_fields[j] - '0';
816*89a07cf8Schristos for (++j; j < len && csdigit(reverse_fields[j]); j++)
817*89a07cf8Schristos // should check for overflow
818*89a07cf8Schristos n = n*10 + reverse_fields[j] - '0';
819*89a07cf8Schristos }
820*89a07cf8Schristos else
821*89a07cf8Schristos n = INT_MAX;
822*89a07cf8Schristos reverse_names(f, n);
823*89a07cf8Schristos }
824*89a07cf8Schristos }
825*89a07cf8Schristos int is_multiple = join_fields(f) > 0;
826*89a07cf8Schristos if (capitalize_fields.search(i) >= 0)
827*89a07cf8Schristos capitalize_field(f);
828*89a07cf8Schristos if (memchr(f.contents(), '\n', f.length()) == 0) {
829*89a07cf8Schristos fprintf(fp, ".ds [%c ", i);
830*89a07cf8Schristos if (f[0] == ' ' || f[0] == '\\' || f[0] == '"')
831*89a07cf8Schristos putc('"', fp);
832*89a07cf8Schristos put_string(f, fp);
833*89a07cf8Schristos putc('\n', fp);
834*89a07cf8Schristos }
835*89a07cf8Schristos else {
836*89a07cf8Schristos fprintf(fp, ".de [%c\n", i);
837*89a07cf8Schristos put_string(f, fp);
838*89a07cf8Schristos fputs("..\n", fp);
839*89a07cf8Schristos }
840*89a07cf8Schristos if (i == 'P') {
841*89a07cf8Schristos int multiple_pages = 0;
842*89a07cf8Schristos const char *s = f.contents();
843*89a07cf8Schristos const char *end = f.contents() + f.length();
844*89a07cf8Schristos for (;;) {
845*89a07cf8Schristos const char *token_start = s;
846*89a07cf8Schristos if (!get_token(&s, end))
847*89a07cf8Schristos break;
848*89a07cf8Schristos const token_info *ti = lookup_token(token_start, s);
849*89a07cf8Schristos if (ti->is_hyphen() || ti->is_range_sep()) {
850*89a07cf8Schristos multiple_pages = 1;
851*89a07cf8Schristos break;
852*89a07cf8Schristos }
853*89a07cf8Schristos }
854*89a07cf8Schristos fprintf(fp, ".nr [P %d\n", multiple_pages);
855*89a07cf8Schristos }
856*89a07cf8Schristos else if (i == 'E')
857*89a07cf8Schristos fprintf(fp, ".nr [E %d\n", is_multiple);
858*89a07cf8Schristos }
859*89a07cf8Schristos for (const char *p = "TAO"; *p; p++) {
860*89a07cf8Schristos int fi = field_index[(unsigned char)*p];
861*89a07cf8Schristos if (fi != NULL_FIELD_INDEX) {
862*89a07cf8Schristos string &f = field[fi];
863*89a07cf8Schristos fprintf(fp, ".nr [%c %d\n", *p,
864*89a07cf8Schristos is_terminated(f.contents(), f.contents() + f.length()));
865*89a07cf8Schristos }
866*89a07cf8Schristos }
867*89a07cf8Schristos int t = classify();
868*89a07cf8Schristos fprintf(fp, ".][ %d %s\n", t, reference_types[t]);
869*89a07cf8Schristos if (annotation_macro.length() > 0 && annotation_field >= 0
870*89a07cf8Schristos && field_index[annotation_field] != NULL_FIELD_INDEX) {
871*89a07cf8Schristos putc('.', fp);
872*89a07cf8Schristos put_string(annotation_macro, fp);
873*89a07cf8Schristos putc('\n', fp);
874*89a07cf8Schristos put_string(field[field_index[annotation_field]], fp);
875*89a07cf8Schristos }
876*89a07cf8Schristos }
877*89a07cf8Schristos
print_sort_key_comment(FILE * fp)878*89a07cf8Schristos void reference::print_sort_key_comment(FILE *fp)
879*89a07cf8Schristos {
880*89a07cf8Schristos fputs(".\\\"", fp);
881*89a07cf8Schristos put_string(sort_key, fp);
882*89a07cf8Schristos putc('\n', fp);
883*89a07cf8Schristos }
884*89a07cf8Schristos
find_year(const char * start,const char * end,const char ** endp)885*89a07cf8Schristos const char *find_year(const char *start, const char *end, const char **endp)
886*89a07cf8Schristos {
887*89a07cf8Schristos for (;;) {
888*89a07cf8Schristos while (start < end && !csdigit(*start))
889*89a07cf8Schristos start++;
890*89a07cf8Schristos const char *ptr = start;
891*89a07cf8Schristos if (start == end)
892*89a07cf8Schristos break;
893*89a07cf8Schristos while (ptr < end && csdigit(*ptr))
894*89a07cf8Schristos ptr++;
895*89a07cf8Schristos if (ptr - start == 4 || ptr - start == 3
896*89a07cf8Schristos || (ptr - start == 2
897*89a07cf8Schristos && (start[0] >= '4' || (start[0] == '3' && start[1] >= '2')))) {
898*89a07cf8Schristos *endp = ptr;
899*89a07cf8Schristos return start;
900*89a07cf8Schristos }
901*89a07cf8Schristos start = ptr;
902*89a07cf8Schristos }
903*89a07cf8Schristos return 0;
904*89a07cf8Schristos }
905*89a07cf8Schristos
find_day(const char * start,const char * end,const char ** endp)906*89a07cf8Schristos static const char *find_day(const char *start, const char *end,
907*89a07cf8Schristos const char **endp)
908*89a07cf8Schristos {
909*89a07cf8Schristos for (;;) {
910*89a07cf8Schristos while (start < end && !csdigit(*start))
911*89a07cf8Schristos start++;
912*89a07cf8Schristos const char *ptr = start;
913*89a07cf8Schristos if (start == end)
914*89a07cf8Schristos break;
915*89a07cf8Schristos while (ptr < end && csdigit(*ptr))
916*89a07cf8Schristos ptr++;
917*89a07cf8Schristos if ((ptr - start == 1 && start[0] != '0')
918*89a07cf8Schristos || (ptr - start == 2 &&
919*89a07cf8Schristos (start[0] == '1'
920*89a07cf8Schristos || start[0] == '2'
921*89a07cf8Schristos || (start[0] == '3' && start[1] <= '1')
922*89a07cf8Schristos || (start[0] == '0' && start[1] != '0')))) {
923*89a07cf8Schristos *endp = ptr;
924*89a07cf8Schristos return start;
925*89a07cf8Schristos }
926*89a07cf8Schristos start = ptr;
927*89a07cf8Schristos }
928*89a07cf8Schristos return 0;
929*89a07cf8Schristos }
930*89a07cf8Schristos
find_month(const char * start,const char * end)931*89a07cf8Schristos static int find_month(const char *start, const char *end)
932*89a07cf8Schristos {
933*89a07cf8Schristos static const char *months[] = {
934*89a07cf8Schristos "january",
935*89a07cf8Schristos "february",
936*89a07cf8Schristos "march",
937*89a07cf8Schristos "april",
938*89a07cf8Schristos "may",
939*89a07cf8Schristos "june",
940*89a07cf8Schristos "july",
941*89a07cf8Schristos "august",
942*89a07cf8Schristos "september",
943*89a07cf8Schristos "october",
944*89a07cf8Schristos "november",
945*89a07cf8Schristos "december",
946*89a07cf8Schristos };
947*89a07cf8Schristos for (;;) {
948*89a07cf8Schristos while (start < end && !csalpha(*start))
949*89a07cf8Schristos start++;
950*89a07cf8Schristos const char *ptr = start;
951*89a07cf8Schristos if (start == end)
952*89a07cf8Schristos break;
953*89a07cf8Schristos while (ptr < end && csalpha(*ptr))
954*89a07cf8Schristos ptr++;
955*89a07cf8Schristos if (ptr - start >= 3) {
956*89a07cf8Schristos for (unsigned int i = 0; i < sizeof(months)/sizeof(months[0]); i++) {
957*89a07cf8Schristos const char *q = months[i];
958*89a07cf8Schristos const char *p = start;
959*89a07cf8Schristos for (; p < ptr; p++, q++)
960*89a07cf8Schristos if (cmlower(*p) != *q)
961*89a07cf8Schristos break;
962*89a07cf8Schristos if (p >= ptr)
963*89a07cf8Schristos return i;
964*89a07cf8Schristos }
965*89a07cf8Schristos }
966*89a07cf8Schristos start = ptr;
967*89a07cf8Schristos }
968*89a07cf8Schristos return -1;
969*89a07cf8Schristos }
970*89a07cf8Schristos
contains_field(char c) const971*89a07cf8Schristos int reference::contains_field(char c) const
972*89a07cf8Schristos {
973*89a07cf8Schristos return field_index[(unsigned char)c] != NULL_FIELD_INDEX;
974*89a07cf8Schristos }
975*89a07cf8Schristos
classify()976*89a07cf8Schristos int reference::classify()
977*89a07cf8Schristos {
978*89a07cf8Schristos if (contains_field('J'))
979*89a07cf8Schristos return JOURNAL_ARTICLE;
980*89a07cf8Schristos if (contains_field('B'))
981*89a07cf8Schristos return ARTICLE_IN_BOOK;
982*89a07cf8Schristos if (contains_field('G'))
983*89a07cf8Schristos return TECH_REPORT;
984*89a07cf8Schristos if (contains_field('R'))
985*89a07cf8Schristos return TECH_REPORT;
986*89a07cf8Schristos if (contains_field('I'))
987*89a07cf8Schristos return BOOK;
988*89a07cf8Schristos if (contains_field('M'))
989*89a07cf8Schristos return BELL_TM;
990*89a07cf8Schristos return OTHER;
991*89a07cf8Schristos }
992*89a07cf8Schristos
get_year(const char ** endp) const993*89a07cf8Schristos const char *reference::get_year(const char **endp) const
994*89a07cf8Schristos {
995*89a07cf8Schristos if (field_index['D'] != NULL_FIELD_INDEX) {
996*89a07cf8Schristos string &date = field[field_index['D']];
997*89a07cf8Schristos const char *start = date.contents();
998*89a07cf8Schristos const char *end = start + date.length();
999*89a07cf8Schristos return find_year(start, end, endp);
1000*89a07cf8Schristos }
1001*89a07cf8Schristos else
1002*89a07cf8Schristos return 0;
1003*89a07cf8Schristos }
1004*89a07cf8Schristos
get_field(unsigned char c,const char ** endp) const1005*89a07cf8Schristos const char *reference::get_field(unsigned char c, const char **endp) const
1006*89a07cf8Schristos {
1007*89a07cf8Schristos if (field_index[c] != NULL_FIELD_INDEX) {
1008*89a07cf8Schristos string &f = field[field_index[c]];
1009*89a07cf8Schristos const char *start = f.contents();
1010*89a07cf8Schristos *endp = start + f.length();
1011*89a07cf8Schristos return start;
1012*89a07cf8Schristos }
1013*89a07cf8Schristos else
1014*89a07cf8Schristos return 0;
1015*89a07cf8Schristos }
1016*89a07cf8Schristos
get_date(const char ** endp) const1017*89a07cf8Schristos const char *reference::get_date(const char **endp) const
1018*89a07cf8Schristos {
1019*89a07cf8Schristos return get_field('D', endp);
1020*89a07cf8Schristos }
1021*89a07cf8Schristos
nth_field(int i,const char * start,const char ** endp)1022*89a07cf8Schristos const char *nth_field(int i, const char *start, const char **endp)
1023*89a07cf8Schristos {
1024*89a07cf8Schristos while (--i >= 0) {
1025*89a07cf8Schristos start = (char *)memchr(start, FIELD_SEPARATOR, *endp - start);
1026*89a07cf8Schristos if (!start)
1027*89a07cf8Schristos return 0;
1028*89a07cf8Schristos start++;
1029*89a07cf8Schristos }
1030*89a07cf8Schristos const char *e = (char *)memchr(start, FIELD_SEPARATOR, *endp - start);
1031*89a07cf8Schristos if (e)
1032*89a07cf8Schristos *endp = e;
1033*89a07cf8Schristos return start;
1034*89a07cf8Schristos }
1035*89a07cf8Schristos
get_author(int i,const char ** endp) const1036*89a07cf8Schristos const char *reference::get_author(int i, const char **endp) const
1037*89a07cf8Schristos {
1038*89a07cf8Schristos for (const char *f = AUTHOR_FIELDS; *f != '\0'; f++) {
1039*89a07cf8Schristos const char *start = get_field(*f, endp);
1040*89a07cf8Schristos if (start) {
1041*89a07cf8Schristos if (strchr(MULTI_FIELD_NAMES, *f) != 0)
1042*89a07cf8Schristos return nth_field(i, start, endp);
1043*89a07cf8Schristos else if (i == 0)
1044*89a07cf8Schristos return start;
1045*89a07cf8Schristos else
1046*89a07cf8Schristos return 0;
1047*89a07cf8Schristos }
1048*89a07cf8Schristos }
1049*89a07cf8Schristos return 0;
1050*89a07cf8Schristos }
1051*89a07cf8Schristos
get_author_last_name(int i,const char ** endp) const1052*89a07cf8Schristos const char *reference::get_author_last_name(int i, const char **endp) const
1053*89a07cf8Schristos {
1054*89a07cf8Schristos for (const char *f = AUTHOR_FIELDS; *f != '\0'; f++) {
1055*89a07cf8Schristos const char *start = get_field(*f, endp);
1056*89a07cf8Schristos if (start) {
1057*89a07cf8Schristos if (strchr(MULTI_FIELD_NAMES, *f) != 0) {
1058*89a07cf8Schristos start = nth_field(i, start, endp);
1059*89a07cf8Schristos if (!start)
1060*89a07cf8Schristos return 0;
1061*89a07cf8Schristos }
1062*89a07cf8Schristos if (*f == 'A')
1063*89a07cf8Schristos return find_last_name(start, *endp, endp);
1064*89a07cf8Schristos else
1065*89a07cf8Schristos return start;
1066*89a07cf8Schristos }
1067*89a07cf8Schristos }
1068*89a07cf8Schristos return 0;
1069*89a07cf8Schristos }
1070*89a07cf8Schristos
set_date(string & d)1071*89a07cf8Schristos void reference::set_date(string &d)
1072*89a07cf8Schristos {
1073*89a07cf8Schristos if (d.length() == 0)
1074*89a07cf8Schristos delete_field('D');
1075*89a07cf8Schristos else
1076*89a07cf8Schristos insert_field('D', d);
1077*89a07cf8Schristos }
1078*89a07cf8Schristos
same_year(const reference & r1,const reference & r2)1079*89a07cf8Schristos int same_year(const reference &r1, const reference &r2)
1080*89a07cf8Schristos {
1081*89a07cf8Schristos const char *ye1;
1082*89a07cf8Schristos const char *ys1 = r1.get_year(&ye1);
1083*89a07cf8Schristos const char *ye2;
1084*89a07cf8Schristos const char *ys2 = r2.get_year(&ye2);
1085*89a07cf8Schristos if (ys1 == 0) {
1086*89a07cf8Schristos if (ys2 == 0)
1087*89a07cf8Schristos return same_date(r1, r2);
1088*89a07cf8Schristos else
1089*89a07cf8Schristos return 0;
1090*89a07cf8Schristos }
1091*89a07cf8Schristos else if (ys2 == 0)
1092*89a07cf8Schristos return 0;
1093*89a07cf8Schristos else if (ye1 - ys1 != ye2 - ys2)
1094*89a07cf8Schristos return 0;
1095*89a07cf8Schristos else
1096*89a07cf8Schristos return memcmp(ys1, ys2, ye1 - ys1) == 0;
1097*89a07cf8Schristos }
1098*89a07cf8Schristos
same_date(const reference & r1,const reference & r2)1099*89a07cf8Schristos int same_date(const reference &r1, const reference &r2)
1100*89a07cf8Schristos {
1101*89a07cf8Schristos const char *e1;
1102*89a07cf8Schristos const char *s1 = r1.get_date(&e1);
1103*89a07cf8Schristos const char *e2;
1104*89a07cf8Schristos const char *s2 = r2.get_date(&e2);
1105*89a07cf8Schristos if (s1 == 0)
1106*89a07cf8Schristos return s2 == 0;
1107*89a07cf8Schristos else if (s2 == 0)
1108*89a07cf8Schristos return 0;
1109*89a07cf8Schristos else if (e1 - s1 != e2 - s2)
1110*89a07cf8Schristos return 0;
1111*89a07cf8Schristos else
1112*89a07cf8Schristos return memcmp(s1, s2, e1 - s1) == 0;
1113*89a07cf8Schristos }
1114*89a07cf8Schristos
get_sort_field(int i,int si,int ssi,const char ** endp) const1115*89a07cf8Schristos const char *reference::get_sort_field(int i, int si, int ssi,
1116*89a07cf8Schristos const char **endp) const
1117*89a07cf8Schristos {
1118*89a07cf8Schristos const char *start = sort_key.contents();
1119*89a07cf8Schristos const char *end = start + sort_key.length();
1120*89a07cf8Schristos if (i < 0) {
1121*89a07cf8Schristos *endp = end;
1122*89a07cf8Schristos return start;
1123*89a07cf8Schristos }
1124*89a07cf8Schristos while (--i >= 0) {
1125*89a07cf8Schristos start = (char *)memchr(start, SORT_SEP, end - start);
1126*89a07cf8Schristos if (!start)
1127*89a07cf8Schristos return 0;
1128*89a07cf8Schristos start++;
1129*89a07cf8Schristos }
1130*89a07cf8Schristos const char *e = (char *)memchr(start, SORT_SEP, end - start);
1131*89a07cf8Schristos if (e)
1132*89a07cf8Schristos end = e;
1133*89a07cf8Schristos if (si < 0) {
1134*89a07cf8Schristos *endp = end;
1135*89a07cf8Schristos return start;
1136*89a07cf8Schristos }
1137*89a07cf8Schristos while (--si >= 0) {
1138*89a07cf8Schristos start = (char *)memchr(start, SORT_SUB_SEP, end - start);
1139*89a07cf8Schristos if (!start)
1140*89a07cf8Schristos return 0;
1141*89a07cf8Schristos start++;
1142*89a07cf8Schristos }
1143*89a07cf8Schristos e = (char *)memchr(start, SORT_SUB_SEP, end - start);
1144*89a07cf8Schristos if (e)
1145*89a07cf8Schristos end = e;
1146*89a07cf8Schristos if (ssi < 0) {
1147*89a07cf8Schristos *endp = end;
1148*89a07cf8Schristos return start;
1149*89a07cf8Schristos }
1150*89a07cf8Schristos while (--ssi >= 0) {
1151*89a07cf8Schristos start = (char *)memchr(start, SORT_SUB_SUB_SEP, end - start);
1152*89a07cf8Schristos if (!start)
1153*89a07cf8Schristos return 0;
1154*89a07cf8Schristos start++;
1155*89a07cf8Schristos }
1156*89a07cf8Schristos e = (char *)memchr(start, SORT_SUB_SUB_SEP, end - start);
1157*89a07cf8Schristos if (e)
1158*89a07cf8Schristos end = e;
1159*89a07cf8Schristos *endp = end;
1160*89a07cf8Schristos return start;
1161*89a07cf8Schristos }
1162*89a07cf8Schristos
1163