Statistics
| Revision:

svn-gvsig-desktop / trunk / org.gvsig.desktop / org.gvsig.desktop.compat.cdc / org.gvsig.fmap.dal / org.gvsig.fmap.dal.file / org.gvsig.fmap.dal.file.dbf / src / main / java / org / gvsig / fmap / dal / store / dbf / utils / DbaseFileWriter.java @ 46893

History | View | Annotate | Download (18.2 KB)

1
/**
2
 * gvSIG. Desktop Geographic Information System.
3
 *
4
 * Copyright (C) 2007-2013 gvSIG Association.
5
 *
6
 * This program is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU General Public License
8
 * as published by the Free Software Foundation; either version 3
9
 * of the License, or (at your option) any later version.
10
 *
11
 * This program is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
 * GNU General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU General Public License
17
 * along with this program; if not, write to the Free Software
18
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19
 * MA  02110-1301, USA.
20
 *
21
 * For any additional information, do not hesitate to contact us
22
 * at info AT gvsig.com, or visit our website www.gvsig.com.
23
 */
24
package org.gvsig.fmap.dal.store.dbf.utils;
25

    
26
import java.io.IOException;
27
import java.math.BigDecimal;
28
import java.nio.Buffer;
29
import java.nio.BufferOverflowException;
30
import java.nio.ByteBuffer;
31
import java.nio.MappedByteBuffer;
32
import java.nio.channels.FileChannel;
33
import java.nio.charset.Charset;
34
import java.util.Date;
35
import java.util.Iterator;
36
import org.apache.commons.lang3.StringUtils;
37

    
38
import org.gvsig.fmap.dal.DataTypes;
39
import org.gvsig.fmap.dal.exception.CloseException;
40
import org.gvsig.fmap.dal.exception.InitializeException;
41
import org.gvsig.fmap.dal.exception.UnsupportedEncodingException;
42
import org.gvsig.fmap.dal.exception.WriteException;
43
import org.gvsig.fmap.dal.feature.Feature;
44
import org.gvsig.fmap.dal.feature.FeatureAttributeDescriptor;
45
import org.gvsig.fmap.dal.feature.FeatureType;
46

    
47
/**
48
 * A DbaseFileReader is used to read a dbase III format file. The general use of
49
 * this class is: <CODE><PRE>
50
 * DbaseFileHeader header = ...
51
 * WritableFileChannel out = new FileOutputStream("thefile.dbf").getChannel();
52
 * DbaseFileWriter w = new DbaseFileWriter(header,out);
53
 * while ( moreRecords ) {
54
 *   w.write( getMyRecord() );
55
 * }
56
 * w.close();
57
 * </PRE></CODE> You must supply the <CODE>moreRecords</CODE> and
58
 * <CODE>getMyRecord()</CODE> logic...
59
 *
60
 * @author Ian Schneider
61
 */
62
public class DbaseFileWriter {
63

    
64
    private final DbaseFileHeader header;
65
    private FieldFormatter formatter = new FieldFormatter();
66
    private FileChannel channel;
67
    private ByteBuffer buffer;
68
    private boolean headDrity = false;
69
    private ByteBuffer blank;
70
    private int blankSize;
71

    
72
    private Charset charset;
73

    
74
    /**
75
     * Create a DbaseFileWriter using the specified header and writing to the
76
     * given channel.
77
     *
78
     * @param header The DbaseFileHeader to write.
79
     * @param out The Channel to write to.
80
     * @param isNew
81
     * @throws org.gvsig.fmap.dal.exception.InitializeException
82
     *
83
     *
84
     */
85
    public DbaseFileWriter(DbaseFileHeader header, FileChannel out,
86
            boolean isNew) throws InitializeException {
87
        this.header = header;
88
        this.channel = out;
89
        this.headDrity = isNew;
90
        this.setCharset(Charset.forName(header.mappingEncoding(header.getCharsetName())));
91

    
92
        init();
93
    }
94

    
95
    private void init() throws InitializeException {
96
        try {
97
            if (this.channel.size() < this.header.getHeaderLength()) {
98
                this.writeHeader();
99
            }
100
            buffer = ByteBuffer.allocateDirect(header.getRecordLength());
101
        } catch (Exception e) {
102
            throw new InitializeException("DBF Writer", e);
103
        }
104
    }
105

    
106
    private void write() throws WriteException {
107
        ((Buffer) buffer).position(0);
108
        int r = buffer.remaining();
109
        try {
110
            while ((r -= channel.write(buffer)) > 0) {
111
                // do nothing
112
            }
113
        } catch (IOException e) {
114
            throw new WriteException("DBF Writer", e);
115
        }
116
    }
117

    
118
    private void writeHeader() throws WriteException {
119
        try {
120
            channel.position(0);
121
            header.write(channel);
122
        } catch (IOException e) {
123
            throw new WriteException("DBF Writer", e);
124
        }
125
    }
126

    
127
    /**
128
     * Write a single dbase record.
129
     *
130
     * @param feature
131
     * @throws UnsupportedEncodingException
132
     * @throws WriteException
133
     */
134
    public void append(Feature feature) throws WriteException,
135
            UnsupportedEncodingException {
136
        this.fillBuffer(feature);
137
        try {
138
            this.moveToEOF();
139
        } catch (IOException e) {
140
            throw new WriteException("DbaseFileWriter", e);
141
        }
142
        this.header.setNumRecords(this.header.getNumRecords() + 1);
143
        write();
144

    
145
        this.headDrity = true;
146
    }
147

    
148
    private void fillBuffer(Feature feature)
149
            throws UnsupportedEncodingException, WriteException {
150
        FeatureType featureType = feature.getType();
151
        try {
152
            ((Buffer) buffer).position(0);
153

    
154
            // put the 'not-deleted' marker
155
            buffer.put((byte) ' ');
156

    
157
            @SuppressWarnings("unchecked")
158
            Iterator<FeatureAttributeDescriptor> iterator
159
                    = featureType.iterator();
160

    
161
            while (iterator.hasNext()) {
162
                FeatureAttributeDescriptor fad = iterator.next();
163
                if (fad.isComputed()) {
164
                    continue;
165
                }
166

    
167
                if (fad.getName().length() > DbaseFile.MAX_FIELD_NAME_LENGTH) {
168
                    throw new FieldNameTooLongException(
169
                            "DBF file", fad.getName());
170
                }
171

    
172
                int type = fad.getType();
173
                if (type == DataTypes.GEOMETRY) {
174
                    continue;
175
                }
176
                encodeField(fad, feature);
177
            }
178
        } catch (Exception e) {
179
            throw new WriteException("DbaseFileWriter", e);
180
        }
181
    }
182

    
183
    private void moveToEOF() throws IOException {
184
        this.moveTo(this.header.getNumRecords());
185
    }
186

    
187
    private void moveTo(long numReg) throws IOException {
188
        // if (!(channel instanceof FileChannel)) {
189
        // throw new IOException(
190
        // "DbaseFileWriterNIO: channel is not a FileChannel. Cannot position properly");
191
        // }
192

    
193
        long newPos
194
                = header.getHeaderLength() + numReg * header.getRecordLength();
195
        if (this.channel.position() != newPos) {
196
            this.channel.position(newPos);
197
        }
198
    }
199

    
200
    /**
201
     * Write a single dbase record. Useful to update a dbf.
202
     *
203
     * @param feature
204
     * @param numReg
205
     * @throws WriteException
206
     * @throws UnsupportedEncodingException
207
     */
208
    public void update(Feature feature, long numReg) throws WriteException,
209
            UnsupportedEncodingException {
210
        this.fillBuffer(feature);
211

    
212
        try {
213
            this.moveTo(numReg);
214
        } catch (IOException e) {
215
            throw new WriteException("DbaseFileWriter", e);
216
        }
217

    
218
        write();
219
    }
220

    
221
    private void encodeField(FeatureAttributeDescriptor attr, Feature feature) throws java.io.UnsupportedEncodingException, UnsupportedEncodingException {
222
        if (attr == null) {
223
            throw new NullPointerException("attr is NULL");
224
        }
225
        if (feature == null) {
226
            throw new NullPointerException("feature is NULL");
227
        }
228
        try {
229
            DbaseFieldDescriptor descriptor = this.header.getFieldDescription(attr.getName());
230

    
231
            int type = attr.getType();
232
            final int fieldLen = descriptor.getSize();
233
            String fieldString;
234

    
235
//        if( buffer.position()!=descriptor.getOffsetInRecord() ) {
236
//            throw new RuntimeException("Encoding field '"+descriptor.getName()+"' found an incorrect offset.");
237
//        }
238
            if (feature.isNull(attr.getIndex())) {
239
                safeEncode(" ", fieldLen, false);
240
                return;
241
            }
242
            if (DataTypes.BOOLEAN == type) {
243
                boolean b = feature.getBoolean(attr.getIndex());
244
                safeEncode(b ? "T" : "F", 1, true);
245

    
246
            } else if (DataTypes.TIME == type) {
247
                Date date = feature.getTime(attr.getIndex());
248
                fieldString = formatter.formatTime(date);
249
                safeEncode(fieldString, fieldLen, false);
250

    
251
            } else if (DataTypes.TIMESTAMP == type) {
252
                Date date = feature.getTimestamp(attr.getIndex());
253
                fieldString = formatter.formatTimestamp(date);
254
                safeEncode(fieldString, fieldLen, false);
255

    
256
            } else if (DataTypes.DATE == type) {
257
                Date date = feature.getDate(attr.getIndex());
258
                fieldString = formatter.formatDate(date);
259
                safeEncode(fieldString, fieldLen, false);
260

    
261
            } else if (DataTypes.DECIMAL == type) {
262
                BigDecimal n = feature.getDecimal(attr.getIndex());
263
                fieldString = formatter.format(n, fieldLen);
264
                safeEncode(fieldString, fieldLen, false);
265

    
266
            } else if (DataTypes.DOUBLE == type) {
267
                double n = feature.getDouble(attr.getIndex());
268
                fieldString = formatter.format(n, fieldLen, descriptor.getScale());
269
                safeEncode(fieldString, fieldLen, false);
270

    
271
            } else if (DataTypes.FLOAT == type) {
272
                float n = feature.getFloat(attr.getIndex());
273
                fieldString = formatter.format(n, fieldLen, descriptor.getScale());
274
                safeEncode(fieldString, fieldLen, false);
275

    
276
            } else if (DataTypes.LONG == type) {
277
                long l = feature.getLong(attr.getIndex());
278
                fieldString = formatter.format(l, fieldLen);
279
                safeEncode(fieldString, fieldLen, false);
280

    
281
            } else if (DataTypes.INT == type) {
282
                int n = feature.getInt(attr.getIndex());
283
                fieldString = formatter.format(n, fieldLen);
284
                safeEncode(fieldString, fieldLen, false);
285

    
286
            } else if (DataTypes.BYTE == type) {
287
                int n = feature.getInt(attr.getIndex());
288
                fieldString = formatter.format(n, fieldLen);
289
                safeEncode(fieldString, fieldLen, false);
290

    
291
            } else if (DataTypes.STRING == type) {
292
                String s = feature.getString(attr.getIndex());
293
                safeEncode(StringUtils.defaultIfEmpty(s, ""), fieldLen, true);
294

    
295
            } else {
296
                // Si no conocemos el tipo intentamos guardarlo como un string
297
                String s = feature.getString(attr.getIndex());
298
                safeEncode(StringUtils.defaultIfEmpty(s, ""), fieldLen, true);
299

    
300
            }
301
        } catch (Exception ex) {
302
            throw new RuntimeException("Can't encode field '" + attr.getName() + "'", ex);
303
        }
304

    
305
    }
306

    
307
    /**
308
     * Returns a safely padded (and potentially truncated) string
309
     *
310
     * This may truncate some record, but it is required to ensure that the
311
     * field limit is not overflowed when using variable-length charsets such as
312
     * UTF-8.
313
     *
314
     * @throws UnsupportedEncodingException
315
     */
316
    private void safeEncode(String in, int limit, boolean rightPadding) throws UnsupportedEncodingException {
317
        try {
318
            byte[] encodedString = in.getBytes(this.charset);
319
            if (encodedString.length > limit) {
320
                // too long, truncating
321
                /*
322
                             * The block code bellow is equivalent to this simple code
323
                             * fragment:
324

325
                    if (rightPadding) {
326
                            in = in.substring(0, in.length()-1);
327
                            encodedString = in.getBytes(charset);
328
                    }
329
                    else {
330
                            in.substring(1, in.length());
331
                            encodedString = in.getBytes(charset);
332
                    }
333

334
                    However, the implemented algorithm has a much better performance
335
                    for the average and worst cases (when the input string has a lot
336
                    of multibyte characters), while keeping a good performance
337
                    for the best case (when all the characters in the input string
338
                    can be represented as single bytes using the selected charset).
339

340
                    The general strategy is to compute the deviation from the
341
                    required maximum number of bytes (limit) and the actual number
342
                    of bytes of the encoded String.
343

344
                    Then, we use this deviation to estimate the amount of characters
345
                    to truncate, based on the average factor of bytes per char in the
346
                    input string.
347

348
                    We truncate the string using this approach until the deviation
349
                    gets stable.
350

351
                    Finally, as we should be close enough to the right truncation position,
352
                    we increment/decrement the truncated string by only 1 character, to
353
                    ensure we truncate in the exact position. 
354
                 */
355
                String str = in;
356
                int estimatedDiff, deviation;
357
                int deviationPrev;
358
                double ratio;
359
                byte[] encodedChar;
360
                int truncatePos = 0;
361
                deviation = encodedString.length - limit;
362
                deviationPrev = deviation - 1;
363
                while (Math.abs(deviation) > Math.abs(deviationPrev) && str.length() > 0) {
364
                    ratio = ((double) encodedString.length) / ((double) str.length());
365
                    // apply the estimated diff, ensuring it is at least >= 1.0 in absolute value
366
                    estimatedDiff = Math.max((int) (((double) deviation) / ratio), (int) (Math.signum(deviation) * 1));
367
                    // too long, truncating
368
                    if (rightPadding) {
369
                        truncatePos = Math.max(str.length() - estimatedDiff, 0);
370
                        str = in.substring(0, truncatePos);
371
                    } else {
372
                        truncatePos = Math.max(truncatePos + estimatedDiff, 0);
373
                        str = in.substring(truncatePos);
374
                    }
375
                    encodedString = str.getBytes(charset);
376
                    deviationPrev = deviation;
377
                    deviation = encodedString.length - limit;
378
                }
379
                // now we are close enough, get the exact position for truncating
380
                while (encodedString.length > limit) {
381
                    // too long, truncating
382
                    //                                      System.out.println("truncating");
383
                    if (rightPadding) {
384
                        str = in.substring(0, str.length() - 1);
385
                    } else {
386
                        truncatePos = truncatePos + 1;
387
                        str = in.substring(truncatePos);
388
                    }
389
                    encodedString = str.getBytes(charset);
390
                }
391
                while (encodedString.length < limit && str.length() < in.length()) {
392
                    // Extend if necessary:
393
                    // 1 - Get the length in bytes of the next char
394
                    // 2 - Add the char to the substring if we are still within the limits 
395
                    //                                      System.out.println("extending");
396
                    if (rightPadding) {
397
                        encodedChar = in.substring(str.length(), str.length() + 1).getBytes(charset);
398
                    } else {
399
                        encodedChar = in.substring(truncatePos - 1, truncatePos).getBytes(charset);
400
                        //                                              System.out.println(encodedChar);
401
                        //                                              System.out.println(encodedChar.length);
402
                        //                                              System.out.println(testStrings[i].substring(truncatePos-1, truncatePos));
403
                    }
404
                    //                                      System.out.println(testStrings[i].substring(in.length(), in.length()+1));
405
                    if ((encodedString.length + encodedChar.length) > limit) {
406
                        // one more char would overflow the limit
407
                        break;
408
                    }
409
                    // too short, extending
410
                    if (rightPadding) {
411
                        str = in.substring(0, str.length() + 1);
412
                    } else {
413
                        truncatePos = truncatePos - 1;
414
                        str = in.substring(truncatePos);
415
                    }
416
                    encodedString = str.getBytes(charset);
417
                }
418
            }
419
            if (rightPadding) {
420
                buffer.put(encodedString);
421
            }
422
            if (encodedString.length < limit) {
423
                // too short, padding
424
                int i = encodedString.length;
425
                while (i < limit) {
426
                    ((Buffer) blank).position(0);
427
                    buffer.put(blank);
428
                    i = i + blankSize;
429
                }
430
                if (i > limit) {
431
                    // Might happen for instance if charset is UTF16 and the
432
                    // limit of characters in the field is an odd number
433
                    throw new UnsupportedEncodingException(new Exception("Impossible to encode this DBF using the selected charset"));
434
                }
435
            }
436
            if (!rightPadding) {
437
                buffer.put(encodedString);
438
            }
439
        } catch (BufferOverflowException exc) {
440
            // Might happen for instance if charset is UTF16 and the
441
            // limit of characters in the field is an odd number
442
            throw new UnsupportedEncodingException(exc);
443
        }
444
    }
445

    
446

    
447
    /**
448
     * Release resources associated with this writer. <B>Highly recommended</B>
449
     *
450
     * @throws CloseException
451
     */
452
    public void close() throws CloseException {
453
        // IANS - GEOT 193, bogus 0x00 written. According to dbf spec, optional
454
        // eof 0x1a marker is, well, optional. Since the original code wrote a
455
        // 0x00 (which is wrong anyway) lets just do away with this :)
456
        // - produced dbf works in OpenOffice and ArcExplorer java, so it must
457
        // be okay.
458
        // buffer.position(0);
459
        // buffer.put((byte) 0).position(0).limit(1);
460
        // write();
461

    
462
        if (headDrity) {
463
            try {
464
                this.writeHeader();
465
            } catch (WriteException e) {
466
                throw new CloseException("DbaseFileWriter", e);
467
            }
468
        }
469

    
470
        try {
471
            channel.close();
472
        } catch (IOException e) {
473
            throw new CloseException("DBF Writer", e);
474
        }
475
        if (buffer instanceof MappedByteBuffer) {
476
            // NIOUtilities.clean(buffer);
477
        }
478

    
479
        buffer = null;
480
        channel = null;
481
        formatter = null;
482
    }
483

    
484
    public void setCharset(Charset charset) {
485
        this.charset = charset;
486
        blank = charset.encode(" ");
487
        blankSize = blank.limit();
488
    }
489

    
490
}