svn-gvsig-desktop / trunk / org.gvsig.desktop / org.gvsig.desktop.compat.cdc / org.gvsig.fmap.dal / org.gvsig.fmap.dal.file / org.gvsig.fmap.dal.file.dbf / src / main / java / org / gvsig / fmap / dal / store / dbf / utils / DbaseFileWriter.java @ 46893
History | View | Annotate | Download (18.2 KB)
1 |
/**
|
---|---|
2 |
* gvSIG. Desktop Geographic Information System.
|
3 |
*
|
4 |
* Copyright (C) 2007-2013 gvSIG Association.
|
5 |
*
|
6 |
* This program is free software; you can redistribute it and/or
|
7 |
* modify it under the terms of the GNU General Public License
|
8 |
* as published by the Free Software Foundation; either version 3
|
9 |
* of the License, or (at your option) any later version.
|
10 |
*
|
11 |
* This program is distributed in the hope that it will be useful,
|
12 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14 |
* GNU General Public License for more details.
|
15 |
*
|
16 |
* You should have received a copy of the GNU General Public License
|
17 |
* along with this program; if not, write to the Free Software
|
18 |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
19 |
* MA 02110-1301, USA.
|
20 |
*
|
21 |
* For any additional information, do not hesitate to contact us
|
22 |
* at info AT gvsig.com, or visit our website www.gvsig.com.
|
23 |
*/
|
24 |
package org.gvsig.fmap.dal.store.dbf.utils; |
25 |
|
26 |
import java.io.IOException; |
27 |
import java.math.BigDecimal; |
28 |
import java.nio.Buffer; |
29 |
import java.nio.BufferOverflowException; |
30 |
import java.nio.ByteBuffer; |
31 |
import java.nio.MappedByteBuffer; |
32 |
import java.nio.channels.FileChannel; |
33 |
import java.nio.charset.Charset; |
34 |
import java.util.Date; |
35 |
import java.util.Iterator; |
36 |
import org.apache.commons.lang3.StringUtils; |
37 |
|
38 |
import org.gvsig.fmap.dal.DataTypes; |
39 |
import org.gvsig.fmap.dal.exception.CloseException; |
40 |
import org.gvsig.fmap.dal.exception.InitializeException; |
41 |
import org.gvsig.fmap.dal.exception.UnsupportedEncodingException; |
42 |
import org.gvsig.fmap.dal.exception.WriteException; |
43 |
import org.gvsig.fmap.dal.feature.Feature; |
44 |
import org.gvsig.fmap.dal.feature.FeatureAttributeDescriptor; |
45 |
import org.gvsig.fmap.dal.feature.FeatureType; |
46 |
|
47 |
/**
|
48 |
* A DbaseFileReader is used to read a dbase III format file. The general use of
|
49 |
* this class is: <CODE><PRE>
|
50 |
* DbaseFileHeader header = ...
|
51 |
* WritableFileChannel out = new FileOutputStream("thefile.dbf").getChannel();
|
52 |
* DbaseFileWriter w = new DbaseFileWriter(header,out);
|
53 |
* while ( moreRecords ) {
|
54 |
* w.write( getMyRecord() );
|
55 |
* }
|
56 |
* w.close();
|
57 |
* </PRE></CODE> You must supply the <CODE>moreRecords</CODE> and
|
58 |
* <CODE>getMyRecord()</CODE> logic...
|
59 |
*
|
60 |
* @author Ian Schneider
|
61 |
*/
|
62 |
public class DbaseFileWriter { |
63 |
|
64 |
private final DbaseFileHeader header; |
65 |
private FieldFormatter formatter = new FieldFormatter(); |
66 |
private FileChannel channel; |
67 |
private ByteBuffer buffer; |
68 |
private boolean headDrity = false; |
69 |
private ByteBuffer blank; |
70 |
private int blankSize; |
71 |
|
72 |
private Charset charset; |
73 |
|
74 |
/**
|
75 |
* Create a DbaseFileWriter using the specified header and writing to the
|
76 |
* given channel.
|
77 |
*
|
78 |
* @param header The DbaseFileHeader to write.
|
79 |
* @param out The Channel to write to.
|
80 |
* @param isNew
|
81 |
* @throws org.gvsig.fmap.dal.exception.InitializeException
|
82 |
*
|
83 |
*
|
84 |
*/
|
85 |
public DbaseFileWriter(DbaseFileHeader header, FileChannel out, |
86 |
boolean isNew) throws InitializeException { |
87 |
this.header = header;
|
88 |
this.channel = out;
|
89 |
this.headDrity = isNew;
|
90 |
this.setCharset(Charset.forName(header.mappingEncoding(header.getCharsetName()))); |
91 |
|
92 |
init(); |
93 |
} |
94 |
|
95 |
private void init() throws InitializeException { |
96 |
try {
|
97 |
if (this.channel.size() < this.header.getHeaderLength()) { |
98 |
this.writeHeader();
|
99 |
} |
100 |
buffer = ByteBuffer.allocateDirect(header.getRecordLength());
|
101 |
} catch (Exception e) { |
102 |
throw new InitializeException("DBF Writer", e); |
103 |
} |
104 |
} |
105 |
|
106 |
private void write() throws WriteException { |
107 |
((Buffer) buffer).position(0); |
108 |
int r = buffer.remaining();
|
109 |
try {
|
110 |
while ((r -= channel.write(buffer)) > 0) { |
111 |
// do nothing
|
112 |
} |
113 |
} catch (IOException e) { |
114 |
throw new WriteException("DBF Writer", e); |
115 |
} |
116 |
} |
117 |
|
118 |
private void writeHeader() throws WriteException { |
119 |
try {
|
120 |
channel.position(0);
|
121 |
header.write(channel); |
122 |
} catch (IOException e) { |
123 |
throw new WriteException("DBF Writer", e); |
124 |
} |
125 |
} |
126 |
|
127 |
/**
|
128 |
* Write a single dbase record.
|
129 |
*
|
130 |
* @param feature
|
131 |
* @throws UnsupportedEncodingException
|
132 |
* @throws WriteException
|
133 |
*/
|
134 |
public void append(Feature feature) throws WriteException, |
135 |
UnsupportedEncodingException {
|
136 |
this.fillBuffer(feature);
|
137 |
try {
|
138 |
this.moveToEOF();
|
139 |
} catch (IOException e) { |
140 |
throw new WriteException("DbaseFileWriter", e); |
141 |
} |
142 |
this.header.setNumRecords(this.header.getNumRecords() + 1); |
143 |
write(); |
144 |
|
145 |
this.headDrity = true; |
146 |
} |
147 |
|
148 |
private void fillBuffer(Feature feature) |
149 |
throws UnsupportedEncodingException, WriteException { |
150 |
FeatureType featureType = feature.getType(); |
151 |
try {
|
152 |
((Buffer) buffer).position(0); |
153 |
|
154 |
// put the 'not-deleted' marker
|
155 |
buffer.put((byte) ' '); |
156 |
|
157 |
@SuppressWarnings("unchecked") |
158 |
Iterator<FeatureAttributeDescriptor> iterator
|
159 |
= featureType.iterator(); |
160 |
|
161 |
while (iterator.hasNext()) {
|
162 |
FeatureAttributeDescriptor fad = iterator.next(); |
163 |
if (fad.isComputed()) {
|
164 |
continue;
|
165 |
} |
166 |
|
167 |
if (fad.getName().length() > DbaseFile.MAX_FIELD_NAME_LENGTH) {
|
168 |
throw new FieldNameTooLongException( |
169 |
"DBF file", fad.getName());
|
170 |
} |
171 |
|
172 |
int type = fad.getType();
|
173 |
if (type == DataTypes.GEOMETRY) {
|
174 |
continue;
|
175 |
} |
176 |
encodeField(fad, feature); |
177 |
} |
178 |
} catch (Exception e) { |
179 |
throw new WriteException("DbaseFileWriter", e); |
180 |
} |
181 |
} |
182 |
|
183 |
private void moveToEOF() throws IOException { |
184 |
this.moveTo(this.header.getNumRecords()); |
185 |
} |
186 |
|
187 |
private void moveTo(long numReg) throws IOException { |
188 |
// if (!(channel instanceof FileChannel)) {
|
189 |
// throw new IOException(
|
190 |
// "DbaseFileWriterNIO: channel is not a FileChannel. Cannot position properly");
|
191 |
// }
|
192 |
|
193 |
long newPos
|
194 |
= header.getHeaderLength() + numReg * header.getRecordLength(); |
195 |
if (this.channel.position() != newPos) { |
196 |
this.channel.position(newPos);
|
197 |
} |
198 |
} |
199 |
|
200 |
/**
|
201 |
* Write a single dbase record. Useful to update a dbf.
|
202 |
*
|
203 |
* @param feature
|
204 |
* @param numReg
|
205 |
* @throws WriteException
|
206 |
* @throws UnsupportedEncodingException
|
207 |
*/
|
208 |
public void update(Feature feature, long numReg) throws WriteException, |
209 |
UnsupportedEncodingException {
|
210 |
this.fillBuffer(feature);
|
211 |
|
212 |
try {
|
213 |
this.moveTo(numReg);
|
214 |
} catch (IOException e) { |
215 |
throw new WriteException("DbaseFileWriter", e); |
216 |
} |
217 |
|
218 |
write(); |
219 |
} |
220 |
|
221 |
private void encodeField(FeatureAttributeDescriptor attr, Feature feature) throws java.io.UnsupportedEncodingException, UnsupportedEncodingException { |
222 |
if (attr == null) { |
223 |
throw new NullPointerException("attr is NULL"); |
224 |
} |
225 |
if (feature == null) { |
226 |
throw new NullPointerException("feature is NULL"); |
227 |
} |
228 |
try {
|
229 |
DbaseFieldDescriptor descriptor = this.header.getFieldDescription(attr.getName());
|
230 |
|
231 |
int type = attr.getType();
|
232 |
final int fieldLen = descriptor.getSize(); |
233 |
String fieldString;
|
234 |
|
235 |
// if( buffer.position()!=descriptor.getOffsetInRecord() ) {
|
236 |
// throw new RuntimeException("Encoding field '"+descriptor.getName()+"' found an incorrect offset.");
|
237 |
// }
|
238 |
if (feature.isNull(attr.getIndex())) {
|
239 |
safeEncode(" ", fieldLen, false); |
240 |
return;
|
241 |
} |
242 |
if (DataTypes.BOOLEAN == type) {
|
243 |
boolean b = feature.getBoolean(attr.getIndex());
|
244 |
safeEncode(b ? "T" : "F", 1, true); |
245 |
|
246 |
} else if (DataTypes.TIME == type) { |
247 |
Date date = feature.getTime(attr.getIndex());
|
248 |
fieldString = formatter.formatTime(date); |
249 |
safeEncode(fieldString, fieldLen, false);
|
250 |
|
251 |
} else if (DataTypes.TIMESTAMP == type) { |
252 |
Date date = feature.getTimestamp(attr.getIndex());
|
253 |
fieldString = formatter.formatTimestamp(date); |
254 |
safeEncode(fieldString, fieldLen, false);
|
255 |
|
256 |
} else if (DataTypes.DATE == type) { |
257 |
Date date = feature.getDate(attr.getIndex());
|
258 |
fieldString = formatter.formatDate(date); |
259 |
safeEncode(fieldString, fieldLen, false);
|
260 |
|
261 |
} else if (DataTypes.DECIMAL == type) { |
262 |
BigDecimal n = feature.getDecimal(attr.getIndex());
|
263 |
fieldString = formatter.format(n, fieldLen); |
264 |
safeEncode(fieldString, fieldLen, false);
|
265 |
|
266 |
} else if (DataTypes.DOUBLE == type) { |
267 |
double n = feature.getDouble(attr.getIndex());
|
268 |
fieldString = formatter.format(n, fieldLen, descriptor.getScale()); |
269 |
safeEncode(fieldString, fieldLen, false);
|
270 |
|
271 |
} else if (DataTypes.FLOAT == type) { |
272 |
float n = feature.getFloat(attr.getIndex());
|
273 |
fieldString = formatter.format(n, fieldLen, descriptor.getScale()); |
274 |
safeEncode(fieldString, fieldLen, false);
|
275 |
|
276 |
} else if (DataTypes.LONG == type) { |
277 |
long l = feature.getLong(attr.getIndex());
|
278 |
fieldString = formatter.format(l, fieldLen); |
279 |
safeEncode(fieldString, fieldLen, false);
|
280 |
|
281 |
} else if (DataTypes.INT == type) { |
282 |
int n = feature.getInt(attr.getIndex());
|
283 |
fieldString = formatter.format(n, fieldLen); |
284 |
safeEncode(fieldString, fieldLen, false);
|
285 |
|
286 |
} else if (DataTypes.BYTE == type) { |
287 |
int n = feature.getInt(attr.getIndex());
|
288 |
fieldString = formatter.format(n, fieldLen); |
289 |
safeEncode(fieldString, fieldLen, false);
|
290 |
|
291 |
} else if (DataTypes.STRING == type) { |
292 |
String s = feature.getString(attr.getIndex());
|
293 |
safeEncode(StringUtils.defaultIfEmpty(s, ""), fieldLen, true); |
294 |
|
295 |
} else {
|
296 |
// Si no conocemos el tipo intentamos guardarlo como un string
|
297 |
String s = feature.getString(attr.getIndex());
|
298 |
safeEncode(StringUtils.defaultIfEmpty(s, ""), fieldLen, true); |
299 |
|
300 |
} |
301 |
} catch (Exception ex) { |
302 |
throw new RuntimeException("Can't encode field '" + attr.getName() + "'", ex); |
303 |
} |
304 |
|
305 |
} |
306 |
|
307 |
/**
|
308 |
* Returns a safely padded (and potentially truncated) string
|
309 |
*
|
310 |
* This may truncate some record, but it is required to ensure that the
|
311 |
* field limit is not overflowed when using variable-length charsets such as
|
312 |
* UTF-8.
|
313 |
*
|
314 |
* @throws UnsupportedEncodingException
|
315 |
*/
|
316 |
private void safeEncode(String in, int limit, boolean rightPadding) throws UnsupportedEncodingException { |
317 |
try {
|
318 |
byte[] encodedString = in.getBytes(this.charset); |
319 |
if (encodedString.length > limit) {
|
320 |
// too long, truncating
|
321 |
/*
|
322 |
* The block code bellow is equivalent to this simple code
|
323 |
* fragment:
|
324 |
|
325 |
if (rightPadding) {
|
326 |
in = in.substring(0, in.length()-1);
|
327 |
encodedString = in.getBytes(charset);
|
328 |
}
|
329 |
else {
|
330 |
in.substring(1, in.length());
|
331 |
encodedString = in.getBytes(charset);
|
332 |
}
|
333 |
|
334 |
However, the implemented algorithm has a much better performance
|
335 |
for the average and worst cases (when the input string has a lot
|
336 |
of multibyte characters), while keeping a good performance
|
337 |
for the best case (when all the characters in the input string
|
338 |
can be represented as single bytes using the selected charset).
|
339 |
|
340 |
The general strategy is to compute the deviation from the
|
341 |
required maximum number of bytes (limit) and the actual number
|
342 |
of bytes of the encoded String.
|
343 |
|
344 |
Then, we use this deviation to estimate the amount of characters
|
345 |
to truncate, based on the average factor of bytes per char in the
|
346 |
input string.
|
347 |
|
348 |
We truncate the string using this approach until the deviation
|
349 |
gets stable.
|
350 |
|
351 |
Finally, as we should be close enough to the right truncation position,
|
352 |
we increment/decrement the truncated string by only 1 character, to
|
353 |
ensure we truncate in the exact position.
|
354 |
*/
|
355 |
String str = in;
|
356 |
int estimatedDiff, deviation;
|
357 |
int deviationPrev;
|
358 |
double ratio;
|
359 |
byte[] encodedChar; |
360 |
int truncatePos = 0; |
361 |
deviation = encodedString.length - limit; |
362 |
deviationPrev = deviation - 1;
|
363 |
while (Math.abs(deviation) > Math.abs(deviationPrev) && str.length() > 0) { |
364 |
ratio = ((double) encodedString.length) / ((double) str.length()); |
365 |
// apply the estimated diff, ensuring it is at least >= 1.0 in absolute value
|
366 |
estimatedDiff = Math.max((int) (((double) deviation) / ratio), (int) (Math.signum(deviation) * 1)); |
367 |
// too long, truncating
|
368 |
if (rightPadding) {
|
369 |
truncatePos = Math.max(str.length() - estimatedDiff, 0); |
370 |
str = in.substring(0, truncatePos);
|
371 |
} else {
|
372 |
truncatePos = Math.max(truncatePos + estimatedDiff, 0); |
373 |
str = in.substring(truncatePos); |
374 |
} |
375 |
encodedString = str.getBytes(charset); |
376 |
deviationPrev = deviation; |
377 |
deviation = encodedString.length - limit; |
378 |
} |
379 |
// now we are close enough, get the exact position for truncating
|
380 |
while (encodedString.length > limit) {
|
381 |
// too long, truncating
|
382 |
// System.out.println("truncating");
|
383 |
if (rightPadding) {
|
384 |
str = in.substring(0, str.length() - 1); |
385 |
} else {
|
386 |
truncatePos = truncatePos + 1;
|
387 |
str = in.substring(truncatePos); |
388 |
} |
389 |
encodedString = str.getBytes(charset); |
390 |
} |
391 |
while (encodedString.length < limit && str.length() < in.length()) {
|
392 |
// Extend if necessary:
|
393 |
// 1 - Get the length in bytes of the next char
|
394 |
// 2 - Add the char to the substring if we are still within the limits
|
395 |
// System.out.println("extending");
|
396 |
if (rightPadding) {
|
397 |
encodedChar = in.substring(str.length(), str.length() + 1).getBytes(charset);
|
398 |
} else {
|
399 |
encodedChar = in.substring(truncatePos - 1, truncatePos).getBytes(charset);
|
400 |
// System.out.println(encodedChar);
|
401 |
// System.out.println(encodedChar.length);
|
402 |
// System.out.println(testStrings[i].substring(truncatePos-1, truncatePos));
|
403 |
} |
404 |
// System.out.println(testStrings[i].substring(in.length(), in.length()+1));
|
405 |
if ((encodedString.length + encodedChar.length) > limit) {
|
406 |
// one more char would overflow the limit
|
407 |
break;
|
408 |
} |
409 |
// too short, extending
|
410 |
if (rightPadding) {
|
411 |
str = in.substring(0, str.length() + 1); |
412 |
} else {
|
413 |
truncatePos = truncatePos - 1;
|
414 |
str = in.substring(truncatePos); |
415 |
} |
416 |
encodedString = str.getBytes(charset); |
417 |
} |
418 |
} |
419 |
if (rightPadding) {
|
420 |
buffer.put(encodedString); |
421 |
} |
422 |
if (encodedString.length < limit) {
|
423 |
// too short, padding
|
424 |
int i = encodedString.length;
|
425 |
while (i < limit) {
|
426 |
((Buffer) blank).position(0); |
427 |
buffer.put(blank); |
428 |
i = i + blankSize; |
429 |
} |
430 |
if (i > limit) {
|
431 |
// Might happen for instance if charset is UTF16 and the
|
432 |
// limit of characters in the field is an odd number
|
433 |
throw new UnsupportedEncodingException(new Exception("Impossible to encode this DBF using the selected charset")); |
434 |
} |
435 |
} |
436 |
if (!rightPadding) {
|
437 |
buffer.put(encodedString); |
438 |
} |
439 |
} catch (BufferOverflowException exc) { |
440 |
// Might happen for instance if charset is UTF16 and the
|
441 |
// limit of characters in the field is an odd number
|
442 |
throw new UnsupportedEncodingException(exc); |
443 |
} |
444 |
} |
445 |
|
446 |
|
447 |
/**
|
448 |
* Release resources associated with this writer. <B>Highly recommended</B>
|
449 |
*
|
450 |
* @throws CloseException
|
451 |
*/
|
452 |
public void close() throws CloseException { |
453 |
// IANS - GEOT 193, bogus 0x00 written. According to dbf spec, optional
|
454 |
// eof 0x1a marker is, well, optional. Since the original code wrote a
|
455 |
// 0x00 (which is wrong anyway) lets just do away with this :)
|
456 |
// - produced dbf works in OpenOffice and ArcExplorer java, so it must
|
457 |
// be okay.
|
458 |
// buffer.position(0);
|
459 |
// buffer.put((byte) 0).position(0).limit(1);
|
460 |
// write();
|
461 |
|
462 |
if (headDrity) {
|
463 |
try {
|
464 |
this.writeHeader();
|
465 |
} catch (WriteException e) {
|
466 |
throw new CloseException("DbaseFileWriter", e); |
467 |
} |
468 |
} |
469 |
|
470 |
try {
|
471 |
channel.close(); |
472 |
} catch (IOException e) { |
473 |
throw new CloseException("DBF Writer", e); |
474 |
} |
475 |
if (buffer instanceof MappedByteBuffer) { |
476 |
// NIOUtilities.clean(buffer);
|
477 |
} |
478 |
|
479 |
buffer = null;
|
480 |
channel = null;
|
481 |
formatter = null;
|
482 |
} |
483 |
|
484 |
public void setCharset(Charset charset) { |
485 |
this.charset = charset;
|
486 |
blank = charset.encode(" ");
|
487 |
blankSize = blank.limit(); |
488 |
} |
489 |
|
490 |
} |