45 #include "EST_types.h"
46 #include "EST_Track.h"
47 #include "EST_track_aux.h"
48 #include "EST_TrackMap.h"
49 #include "EST_cutils.h"
50 #include "EST_Token.h"
51 #include "EST_TList.h"
52 #include "EST_string_aux.h"
53 #include "EST_walloc.h"
54 #include "EST_TrackFile.h"
55 #include "EST_FileType.h"
56 #include "EST_WaveFile.h"
57 #include "EST_wave_utils.h"
62 #define NEARLY_ZERO 0.00001
64 #define REASONABLE_FRAME_SIZE (20)
65 #define UNREASONABLE_FRAME_SIZE (80)
68 static const char *NIST_SIG =
"NIST_1A\n 1024\n";
69 static const char *NIST_END_SIG =
"end_head\n";
70 #define NIST_HDR_SIZE 1024
72 static int def_load_sample_rate = 500;
78 int nist_get_param_int(
char *hdr,
char *field,
int def_val);
79 char *nist_get_param_str(
char *hdr,
char *field,
char *def_val);
80 const char *sample_type_to_nist(
enum EST_sample_type_t sample_type);
81 enum EST_sample_type_t nist_to_sample_type(
char *type);
84 bool &ascii, EST_EstFileType &t);
86 EST_read_status EST_TrackFile::load_esps(
const EST_String filename,
EST_Track &tr,
float ishift,
float startt)
94 int num_points, num_fields, num_values;
96 EST_read_status r_val;
100 r_val = get_track_esps(filename, &fields, &tt, &fsize, &num_points,
101 &num_values, &fixed);
102 if (r_val == misc_read_error)
104 cerr <<
"Error reading ESPS file " << filename << endl;
105 return misc_read_error;
107 else if (r_val == wrong_format)
110 num_fields = num_values;
117 tr.
resize(num_points,num_fields);
120 for (i = 0; i < num_points; ++i)
122 for (j = 0; j < num_fields; ++j)
123 tr.
a(i, j) = tt[i][j+first_channel];
129 for (i = 0; i < num_fields; ++i)
134 tr.set_single_break(
false);
135 tr.set_equal_space(
true);
138 for (i = 0; i < num_values; ++i)
141 for (i = 0; i < num_values; ++i)
145 tr.set_file_type(tff_esps);
148 if (tr.channel_name(0) ==
"F0")
154 EST_read_status EST_TrackFile::load_ascii(
const EST_String filename,
EST_Track &tr,
float ishift,
float startt)
161 int i, j, n_rows, n_cols=0;
165 if (((filename ==
"-") ? ts.
open(cin) : ts.
open(filename)) != 0)
167 cerr <<
"Can't open track file " << filename << endl;
168 return misc_read_error;
173 if (ishift < NEARLY_ZERO)
176 "Error: Frame spacing must be specified (or apparent frame shift nearly zero)\n";
177 return misc_read_error;
182 for (n_rows = 0; !ts.
eof(); ++n_rows)
188 for (n_cols = 0; !tt.
eof(); ++n_cols)
193 tr.
resize(n_rows, n_cols);
195 for (p = sl.head(), i = 0; p != 0; ++i, p = p->next())
199 for (j = 0; !tt.
eof(); ++j)
200 tr.
a(i, j) = tt.
get().Float(ok);
203 cerr <<
"Wrong number of points in row " << i << endl;
204 cerr <<
"Expected " << n_cols <<
" got " << j << endl;
205 return misc_read_error;
210 tr.set_single_break(FALSE);
211 tr.set_equal_space(TRUE);
212 tr.set_file_type(tff_ascii);
218 EST_read_status EST_TrackFile::load_xgraph(
const EST_String filename,
EST_Track &tr,
float ishift,
float startt)
226 int i, j, n_rows, n_cols;
230 if (((filename ==
"-") ? ts.
open(cin) : ts.
open(filename)) != 0)
232 cerr <<
"Can't open track file " << filename << endl;
233 return misc_read_error;
240 for (n_rows = 0; !ts.
eof(); ++n_rows)
244 for (n_cols = 0; !tt.
eof(); ++n_cols)
250 tr.
resize(n_rows, n_cols);
252 for (p = sl.head(), i = 0; p != 0; ++i, p = p->next())
256 tr.
t(i) = tt.
get().Float(ok);
257 for (j = 0; !tt.
eof(); ++j)
258 tr.
a(i, j) = tt.
get().Float(ok);
261 cerr <<
"Wrong number of points in row " << i << endl;
262 cerr <<
"Expected " << n_cols <<
" got " << j << endl;
263 return misc_read_error;
267 tr.set_single_break(FALSE);
268 tr.set_equal_space(TRUE);
269 tr.set_file_type(tff_xgraph);
275 EST_read_status EST_TrackFile::load_xmg(
const EST_String filename,
EST_Track &tr,
float ishift,
float startt)
286 if (((filename ==
"-") ? ts.
open(cin) : ts.
open(filename)) != 0)
288 cerr <<
"Can't open track file " << filename << endl;
289 return misc_read_error;
294 if (ts.
peek().string() !=
"XAO1")
299 while ((!ts.
eof()) && (ts.
peek().string() !=
"\014"))
301 k = ts.
get().string();
302 v = ts.
get().string();
307 else if (k ==
"YMin")
309 else if (k ==
"YMax")
316 cerr <<
"Unexpected end of file in reading xmg header\n";
317 return misc_read_error;
323 for (n = 0; !ts.
eof(); ++n)
329 for (p = sl.head(), i = 0; p != 0; ++i, p = p->next())
333 if (ts.
peek().string() !=
"=")
335 tr.
t(i) = ts.
get().Float(ok) / 1000.0;
336 tr.
a(i) = ts.
get().Float(ok);
345 tr.set_single_break(TRUE);
346 tr.set_equal_space(FALSE);
347 tr.set_file_type(tff_xmg);
353 EST_read_status EST_TrackFile::load_est(
const EST_String filename,
354 EST_Track &tr,
float ishift,
float startt)
359 if (((filename ==
"-") ? ts.
open(cin) : ts.
open(filename)) != 0)
361 cerr <<
"Can't open track file " << filename << endl;
362 return misc_read_error;
367 r = load_est_ts(ts, tr, ishift, startt);
369 if ((r == format_ok) && (!ts.
eof()))
371 cerr <<
"Not end of file, but expected it\n";
372 return misc_read_error;
382 if (swap) swapfloat(&f);
387 EST_Track &tr,
float ishift,
float startt)
392 int num_frames, num_channels;
402 if ((r = read_est_header(ts, hinfo, ascii, t)) != format_ok)
404 if (t != est_file_track)
405 return misc_read_error;
407 breaks = hinfo.
present(
"BreaksPresent") ? true :
false;
409 if ((hinfo.
present(
"EqualSpace")) &&
410 ((hinfo.
S(
"EqualSpace") ==
"true") ||
411 (hinfo.
S(
"EqualSpace") ==
"1")))
414 num_frames = hinfo.
I(
"NumFrames");
415 num_channels = hinfo.
I(
"NumChannels");
416 tr.
resize(num_frames, num_channels);
418 hinfo.
remove(
"NumFrames");
419 hinfo.
remove(
"EqualSpace");
420 hinfo.
remove(
"NumChannels");
421 hinfo.
remove(
"BreaksPresent");
429 for (p.
begin(hinfo); p;)
433 if (c->k.contains(
"Channel_"))
436 c->k.after(
"Channel_").Int());
447 if (!hinfo.
present(
"ByteOrder"))
449 else if (((hinfo.
S(
"ByteOrder") ==
"01") ? bo_little : bo_big)
455 const int BINARY_CHANNEL_BUFFER_SIZE=1024;
457 float frame_buffer[BINARY_CHANNEL_BUFFER_SIZE];
460 if( num_channels > BINARY_CHANNEL_BUFFER_SIZE )
461 frame =
new float[num_channels];
463 frame = frame_buffer;
467 for (i = 0; i < num_frames; ++i)
476 cerr <<
"unexpected end of file when looking for " << num_frames-i <<
" more frame(s)" << endl;
477 return misc_read_error;
479 tr.
t(i) = ts.
get().Float(ok);
481 return misc_read_error;
484 tr.
t(i) = get_float(ts,swap);
491 v = ts.
get().string();
499 if (get_float(ts,swap) == 0.0)
522 for (j = 0; j < num_channels; ++j){
523 tr.
a(i, j) = ts.
get().Float(ok);
525 return misc_read_error;
529 ts.
fread( frame,
sizeof(
float), num_channels );
531 for( j=0; j<num_channels; ++j ){
532 swapfloat( &frame[j] );
533 tr.
a(i,j) = frame[j];
536 for( j=0; j<num_channels; ++j )
537 tr.
a(i,j) = frame[j];
546 tr.aux(i, j) = ts.
get().string();
548 return misc_read_error;
552 cerr <<
"Warning: Aux Channel reading not yet implemented";
553 cerr <<
"for binary tracks\n";
559 if( frame != frame_buffer )
565 tr.set_single_break(FALSE);
566 tr.set_equal_space(eq_space);
569 tr.set_file_type(tff_est_ascii);
571 tr.set_file_type(tff_est_binary);
577 float ishift,
float startt)
586 if (ishift < NEARLY_ZERO)
589 "Error: Frame spacing must be specified (or apparent frame shift nearly zero)\n";
590 return misc_read_error;
593 if (((filename ==
"-") ? ts.
open(cin) : ts.
open(filename)) != 0)
595 cerr <<
"Can't open track file " << filename << endl;
596 return misc_read_error;
599 if (ts.
get().string() !=
"SNNS")
601 if (ts.
get().string() !=
"result")
607 int num_frames=0, num_channels=0;
614 if (t.
contains(
"teaching output included"))
621 if (k ==
"No. of output units")
622 num_channels = v.Int();
623 if (k ==
"No. of patterns")
624 num_frames = v.Int();
632 tr.
resize(num_frames, num_channels);
636 for (i = 0; (!ts.
eof()) && (i < num_frames);)
645 for (j = 0; j < num_channels; ++j)
652 for (j = 0; j < num_channels; ++j)
653 tr.
a(i, j) = ts.
get().Float(ok);
659 tr.set_single_break(FALSE);
660 tr.set_equal_space(TRUE);
661 tr.set_file_type(tff_snns);
673 int extra_channels=0;
679 cerr <<
"Output to stdout not available for ESPS file types:";
680 cerr <<
"no output written\n";
684 if ((include_time = (track_tosave.
equal_space() != TRUE)))
686 shift = EST_Track::default_frame_shift;
690 shift = track_tosave.
shift();
694 float **a =
new float*[track_tosave.
num_frames()];
698 a[i] =
new float[track_tosave.
num_channels() + extra_channels];
701 a[i][0] = track_tosave.
t(i);
704 a[i][j + extra_channels] = track_tosave.
a(i,j);
707 char **f_names =
new char*[track_tosave.
num_channels() + extra_channels];
711 f_names[i + extra_channels] = wstrdup(track_tosave.channel_name(i, esps_channel_names, 0));
715 f_names[0] = wstrdup(
"EST_TIME");
717 rc = put_track_esps(filename, f_names,
723 for (i=0; i < track_tosave.
num_frames(); i ++)
726 for (i=0; i < track_tosave.
num_channels()+extra_channels; i++)
727 delete [] f_names[i];
733 EST_write_status EST_TrackFile::save_est_ts(FILE *fp,
EST_Track tr)
737 fprintf(fp,
"EST_File Track\n");
738 fprintf(fp,
"DataType ascii\n");
739 fprintf(fp,
"NumFrames %d\n", tr.
num_frames());
744 fprintf(fp,
"BreaksPresent true\n");
746 fprintf(fp,
"Channel_%d %s\n", i, (
const char *)(tr.channel_name(i)));
749 fprintf(fp,
"Aux_Channel_%d %s\n", i,
750 (
const char *)(tr.aux_channel_name(i)));
754 for (p.
begin(tr); p; ++p)
755 fprintf(fp,
"%s %s\n", (
const char *)p->k,
756 (
const char *) p->v.String());
758 fprintf(fp,
"EST_Header_End\n");
762 fprintf(fp,
"%f\t", tr.
t(i));
763 fprintf(fp,
"%s\t", (
char *)(tr.
val(i) ?
"1 " :
"0 "));
767 fprintf(fp,
"%s ", (
const char *)tr.aux(i, j).
string());
773 EST_write_status EST_TrackFile::save_est_ascii(
const EST_String filename,
781 else if ((fd = fopen(filename,
"wb")) == NULL)
784 r = save_est_ts(fd,tr);
798 else if ((fd = fopen(filename,
"wb")) == NULL)
801 r = save_est_binary_ts(fd,tr);
809 EST_write_status EST_TrackFile::save_est_binary_ts(FILE *fp,
EST_Track tr)
816 fprintf(fp,
"EST_File Track\n");
817 fprintf(fp,
"DataType binary\n");
818 fprintf(fp,
"ByteOrder %s\n", ((EST_NATIVE_BO == bo_big) ?
"10" :
"01"));
819 fprintf(fp,
"NumFrames %d\n", tr.
num_frames());
823 fprintf(fp,
"BreaksPresent true\n");
824 fprintf(fp,
"CommentChar ;\n\n");
826 fprintf(fp,
"Channel_%d %s\n",i,tr.channel_name(i).
str());
827 fprintf(fp,
"EST_Header_End\n");
832 if((
int)fwrite(&tr.
t(i),4,1,fp) != 1)
833 return misc_write_error;
838 float bm = (tr.
val(i) ? 1 : 0);
839 if((
int)fwrite(&bm,4,1,fp) != 1)
840 return misc_write_error;
844 if((
int)fwrite(&tr.
a_no_check(i, j),4,1,fp) != 1)
845 return misc_write_error;
864 outf =
new ofstream(filename);
870 outf->setf(ios::fixed, ios::floatfield);
877 snprintf(fbuf,
sizeof(fbuf),
"%g",tr.
a(i, j));
878 *outf << fbuf <<
" ";
897 outf =
new ofstream(filename);
906 *outf <<
"\""<< tr.channel_name(j) <<
"\"\n";
909 *outf << tr.
t(i) <<
"\t" << tr.
a(i, j) << endl;
919 EST_write_status save_snns_pat(
const EST_String filename,
923 int num_inputs, num_outputs, num_pats, i;
929 outf =
new ofstream(filename);
935 for (pi = inpat.head(); pi ; pi = pi->next())
936 num_pats += inpat(pi).num_frames();
938 *outf <<
"SNNS pattern definition file V3.2\n";
940 time_t thetime = time(0);
941 char *date = ctime(&thetime);
946 num_inputs = inpat.
first().num_channels();
947 num_outputs = outpat.
first().num_channels();
949 *outf <<
"No. of patterns : " << num_pats << endl;
950 *outf <<
"No. of input units : "<< num_inputs << endl;
951 *outf <<
"No. of output units : "<< num_outputs << endl;
952 *outf << endl << endl;
954 for (pi = inpat.head(), po = outpat.head(); pi ;
955 pi = pi->next(), po = po->next())
957 if (inpat(pi).num_frames() != outpat(pi).num_frames())
959 cerr <<
"Error: Input pattern has " << inpat(pi).num_frames()
960 <<
" output pattern has " << outpat(pi).num_frames() << endl;
963 return misc_write_error;
965 for (i = 0; i < inpat(pi).num_frames(); ++i)
968 *outf <<
"#Input pattern " << (i + 1) <<
":\n";
969 for (j = 0; j < inpat(pi).num_channels(); ++j)
970 *outf << inpat(pi).a(i, j) <<
" ";
972 *outf <<
"#Output pattern " << (i + 1) <<
":\n";
973 for (j = 0; j < outpat(po).num_channels(); ++j)
974 *outf << outpat(po).a(i, j) <<
" ";
1044 if (filename ==
"-")
1047 outf =
new ofstream(filename);
1053 outf->setf(ios::fixed, ios::floatfield);
1063 *outf <<
"XAO1\n\n";
1064 *outf <<
"LineType segments \n";
1065 *outf <<
"LineStyle solid \n";
1066 *outf <<
"LineWidth 0 \n";
1067 *outf <<
"Freq " << sr / 1000 << endl;
1068 *outf <<
"Format Binary \n";
1074 *outf << char(12) <<
"\n";
1081 *outf << tr.
ms_t(i) <<
"\t";
1083 *outf <<tr.
a(i, j) <<
" ";
1094 static EST_write_status save_htk_as(
const EST_String filename,
1110 if (orig.f_String(
"contour_type",
"none") ==
"ct_lpc")
1111 type = track_to_htk_lpc(orig, track);
1121 s = rint((HTK_UNITS_PER_SECOND * EST_Track::default_frame_shift/1000.0)/10.0) * 10.0;
1123 file_num_channels += 1;
1128 s = rint((HTK_UNITS_PER_SECOND * track.
shift())/10.0) * 10.0;
1138 header.num_samps = (EST_BIG_ENDIAN ? track.
num_frames()
1142 header.samp_period = (EST_BIG_ENDIAN ? (long) s : SWAPINT((
long) s));
1143 if(use_type == HTK_DISCRETE)
1144 header.samp_size = (EST_BIG_ENDIAN ?
sizeof(short) :
1145 SWAPSHORT(
sizeof(
short)));
1147 header.samp_size = (EST_BIG_ENDIAN ? (
sizeof(float) * file_num_channels) :
1148 SWAPSHORT((sizeof(float) * file_num_channels)));
1150 header.samp_type = EST_BIG_ENDIAN ? type : SWAPSHORT(type);
1154 if (filename ==
"-")
1156 else if ((outf = fopen(filename,
"wb")) == NULL)
1158 cerr <<
"save_htk: cannot open file \"" << filename <<
1159 "\" for writing." << endl;
1160 return misc_write_error;
1164 fwrite((
char*)&(header.num_samps), 1,
sizeof(header.num_samps), outf);
1165 fwrite((
char*)&(header.samp_period), 1,
sizeof(header.samp_period), outf);
1166 fwrite((
char*)&(header.samp_size), 1,
sizeof(header.samp_size), outf);
1167 fwrite((
char*)&(header.samp_type), 1,
sizeof(header.samp_type), outf);
1170 if(use_type == HTK_DISCRETE)
1174 cerr <<
"No data to write as HTK_DISCRETE !" << endl;
1180 cerr <<
"Warning: multiple channel track being written" << endl;
1181 cerr <<
" as discrete will only save channel 0 !" << endl;
1185 short tempshort = (EST_BIG_ENDIAN ? (short)(track.
a(i, 0)) :
1186 SWAPSHORT((
short)(track.
a(i, 0)))) ;
1187 fwrite((
unsigned char*) &tempshort, 1,
sizeof(
short), outf);
1194 if ((type & HTK_EST_PS) != 0)
1197 swapfloat(&(track.
t(i)));
1198 fwrite((
unsigned char*) &(track.
t(i)), 1,
sizeof(
float), outf);
1203 swapfloat(&(track.
a(i,j)));
1204 fwrite((
unsigned char*) &(track.
a(i, j)), 1,
sizeof(
float), outf);
1216 return htk->num_samps > 0 &&
1217 htk->samp_period > 0 &&
1218 htk->samp_size > 0 &&
1219 htk->samp_size < (short)(UNREASONABLE_FRAME_SIZE *
sizeof(
float));
1222 static int htk_swapped_header(
htk_header *header)
1229 if (htk_sane_header(header))
1232 header->num_samps = SWAPINT(header->num_samps);
1233 header->samp_period = SWAPINT(header->samp_period);
1234 header->samp_size = SWAPSHORT(header->samp_size);
1235 header->samp_type = SWAPSHORT(header->samp_type);
1237 if (htk_sane_header(header))
1246 return save_htk_as(filename, tmp, HTK_FBANK);
1251 return save_htk_as(filename, tmp, HTK_FBANK);
1256 return save_htk_as(filename, tmp, HTK_MFCC);
1261 return save_htk_as(filename, tmp, HTK_MFCC | HTK_ENERGY);
1266 return save_htk_as(filename, tmp, HTK_USER);
1269 EST_write_status EST_TrackFile::save_htk_discrete(
const EST_String filename,
EST_Track tmp)
1271 return save_htk_as(filename, tmp, HTK_DISCRETE);
1275 static EST_read_status load_ema_internal(
const EST_String filename,
EST_Track &tmp,
float ishift,
float startt,
bool swap)
1280 int i, j, k, nframes, new_order;
1282 int sample_width, data_length;
1286 if ((fp = fopen(filename,
"rb")) == NULL)
1288 cerr <<
"EST_Track load: couldn't open EST_Track input file" << endl;
1289 return misc_read_error;
1292 fseek(fp, 0, SEEK_END);
1294 data_length = ftell(fp)/sample_width;
1296 nframes = data_length /new_order;
1299 cout <<
"d length: " << data_length <<
" nfr " << nframes << endl;
1301 tmp.
resize(nframes, new_order);
1303 tmp.set_equal_space(TRUE);
1305 file_data.
resize(data_length);
1307 fseek(fp, 0, SEEK_SET);
1309 if ((
int)fread(file_data.
memory(), sample_width, data_length, fp) != data_length)
1312 return misc_read_error;
1316 swap_bytes_short(file_data.
memory(), data_length);
1318 for (i = k = 0; i < nframes; ++i)
1319 for (j = 0; j < new_order; ++j, ++k)
1337 tmp.set_file_type(tff_ema);
1343 EST_read_status EST_TrackFile::load_ema(
const EST_String filename,
EST_Track &tmp,
float ishift,
float startt)
1345 return load_ema_internal(filename, tmp, ishift, startt, FALSE);
1349 EST_read_status EST_TrackFile::load_ema_swapped(
const EST_String filename,
EST_Track &tmp,
float ishift,
float startt)
1351 return load_ema_internal(filename, tmp, ishift, startt, TRUE);
1355 EST_read_status EST_TrackFile::load_NIST(
const EST_String filename,
EST_Track &tmp,
float ishift,
float startt)
1360 char header[NIST_HDR_SIZE];
1361 int samps,sample_width,data_length,actual_bo;
1362 unsigned char *file_data;
1363 enum EST_sample_type_t actual_sample_type;
1364 char *byte_order, *sample_coding;
1370 if (((filename ==
"-") ? ts.
open(cin) : ts.
open(filename)) != 0)
1372 cerr <<
"Can't open track file " << filename << endl;
1373 return misc_read_error;
1376 current_pos = ts.
tell();
1377 if (ts.
fread(header,NIST_HDR_SIZE,1) != 1)
1378 return misc_read_error;
1380 if (strncmp(header,NIST_SIG,
sizeof(NIST_SIG)) != 0)
1381 return wrong_format;
1383 samps = nist_get_param_int(header,
"sample_count",-1);
1384 int num_channels = nist_get_param_int(header,
"channel_count",1);
1385 sample_width = nist_get_param_int(header,
"sample_n_bytes",2);
1387 nist_get_param_int(header,
"sample_rate",def_load_sample_rate);
1388 byte_order = nist_get_param_str(header,
"sample_byte_format",
1389 (EST_BIG_ENDIAN ?
"10" :
"01"));
1390 sample_coding = nist_get_param_str(header,
"sample_coding",
"pcm");
1392 data_length = (samps - offset)*num_channels;
1393 file_data = walloc(
unsigned char,sample_width * data_length);
1395 ts.
seek(current_pos+NIST_HDR_SIZE+(sample_width*offset*(num_channels)));
1397 n = ts.
fread(file_data,sample_width,data_length);
1399 if ((n < 1) && (n != data_length))
1402 wfree(sample_coding);
1404 return misc_read_error;
1406 else if ((n < data_length) && (data_length/num_channels == n))
1408 fprintf(stderr,
"TRACK read: nist header is (probably) non-standard\n");
1409 fprintf(stderr,
"TRACK read: assuming different num_channel interpretation\n");
1412 else if (n < data_length)
1414 fprintf(stderr,
"TRACK read: short file %s\n",
1416 fprintf(stderr,
"WAVE read: at %d got %d instead of %d samples\n",
1417 offset,n,data_length);
1421 actual_sample_type = nist_to_sample_type(sample_coding);
1422 actual_bo = ((strcmp(byte_order,
"10") == 0) ? bo_big : bo_little);
1425 data = convert_raw_data(file_data,data_length,
1426 actual_sample_type,actual_bo);
1429 int num_samples = data_length/num_channels;
1430 tmp.
resize(num_samples, num_channels);
1431 tmp.set_equal_space(TRUE);
1434 cerr <<
"shift " << 1/(float)sample_rate << endl;
1437 for (i=0; i<num_samples; i++)
1439 for (j = 0; j < num_channels; ++j)
1440 tmp.
a(i, j) = data[k++];
1443 for (j = 0; j < num_channels; ++j)
1463 if (filename ==
"-")
1465 else if ((fd = fopen(filename,
"wb")) == NULL)
1469 char header[NIST_HDR_SIZE], p[1024];;
1472 memset(header,0,1024);
1473 strcat(header, NIST_SIG);
1474 sprintf(p,
"channel_count -i %d\n", tr.
num_channels());
1476 sprintf(p,
"sample_count -i %d\n", tr.
num_frames());
1478 int sr = (int)(rint(1/(
float)tr.
shift()));
1479 sprintf(p,
"sample_rate -i %d\n", sr);
1481 t = sample_type_to_nist(st_short);
1482 sprintf(p,
"sample_coding -s%d %s\n", (
signed)strlen(t), t);
1485 strcat(header, NIST_END_SIG);
1487 strcat(header,
"\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n");
1490 if (fwrite(&header, 1024, 1, fd) != 1)
1491 return misc_write_error;
1503 int bo = str_to_bo(
"native");
1515 EST_read_status EST_TrackFile::load_htk(
const EST_String filename,
EST_Track &tmp,
float ishift,
float startt)
1524 int i,j, order, new_frames, num_values, num_channels;
1532 int header_sz =
sizeof(header);
1535 float *compressA=NULL, compressA_Buffer[REASONABLE_FRAME_SIZE];
1536 float *compressB=NULL, compressB_Buffer[REASONABLE_FRAME_SIZE];
1537 bool fileIsCompressed=
false;
1539 unsigned short samp_type, base_samp_type;
1541 if ((fp = fopen(filename,
"rb")) == NULL){
1542 cerr <<
"EST_Track load: couldn't open EST_Track input file" << endl;
1543 return misc_read_error;
1547 if (fread(&header, header_sz, 1, fp) != 1){
1549 return wrong_format;
1552 swap = htk_swapped_header(&header);
1556 return read_format_error;
1559 samp_type = header.samp_type;
1560 base_samp_type = samp_type & HTK_MASK;
1562 time_included = (samp_type & HTK_EST_PS) != 0;
1564 switch(base_samp_type){
1566 cerr <<
"Can't read HTK WAVEFORM format file into track" << endl;
1567 return misc_read_error;
1576 EST_warning(
"reading HTK_IREFC and HTK_LPREC parameter types is unsupported" );
1578 return read_format_error;
1582 pname =
"ct_cepstrum";
1587 base_samp_type = HTK_LPCCEP;
1588 samp_type = HTK_LPCCEP | HTK_DELTA;
1589 pname =
"ct_cepstrum";
1602 cerr <<
"Can't read HTK DISCRETE format file into track" << endl;
1603 return misc_read_error;
1612 return wrong_format;
1619 if( header.samp_type & HTK_COMP ){
1621 fileIsCompressed =
true;
1623 num_channels = num_values = header.samp_size /
sizeof(
short int);
1626 if (num_channels > REASONABLE_FRAME_SIZE){
1627 compressA =
new float[num_values];
1628 compressB =
new float[num_values];
1631 compressA = compressA_Buffer;
1632 compressB = compressB_Buffer;
1635 if( (fread( compressA,
sizeof(
float), num_values, fp )) !=
static_cast<size_t>(num_values) ){
1637 return read_format_error;
1640 if( (fread( compressB,
sizeof(
float), num_values, fp )) !=
static_cast<size_t>(num_values) ){
1642 return read_format_error;
1646 swap_bytes_float( compressA, num_values );
1647 swap_bytes_float( compressB, num_values );
1652 new_frames = header.num_samps - (2*(
sizeof(float)-
sizeof(
short int)));
1655 num_channels = num_values = header.samp_size /
sizeof(float);
1656 new_frames = header.num_samps;
1659 if (num_values > UNREASONABLE_FRAME_SIZE){
1661 return read_format_error;
1667 float shift = ((float)header.samp_period/ (
float)HTK_UNITS_PER_SECOND);
1669 tmp.
resize(new_frames, num_channels);
1671 if ((startt > 0) && (startt < NEARLY_ZERO ))
1672 EST_warning(
"setting htk file start to %f", startt );
1676 tmp.set_equal_space(!time_included);
1679 long dataBeginPosition = ftell(fp);
1680 if( dataBeginPosition == -1 ){
1682 return wrong_format;
1685 if (fseek(fp,0,SEEK_END)){
1687 return wrong_format;
1691 if ((file_length = ftell(fp)) == -1){
1693 return wrong_format;
1697 if( fileIsCompressed ){
1698 expected_vals = (file_length-dataBeginPosition) /
sizeof(
short int);
1700 if( header.samp_type & HTK_CRC )
1704 expected_vals = (file_length-dataBeginPosition) /
sizeof(
float);
1711 if( expected_vals != (num_values * new_frames) ){
1714 return wrong_format;
1719 order = num_channels;
1720 if( samp_type & HTK_NO_E )
1723 if( samp_type & HTK_AC )
1725 else if( samp_type & HTK_DELTA )
1728 if( samp_type & HTK_ENERGY )
1732 if( fseek(fp, dataBeginPosition, SEEK_SET) == -1 ){
1733 cerr <<
"Couldn't position htk file at start of data" << endl;
1735 return misc_read_error;
1738 if( fileIsCompressed ){
1739 short int *frame, frame_buffer[REASONABLE_FRAME_SIZE];
1740 if( num_values > REASONABLE_FRAME_SIZE )
1741 frame =
new short int[num_values];
1743 frame = frame_buffer;
1745 int first_channel = time_included?1:0;
1747 for( i=0; i<new_frames; i++ ){
1748 if( fread( frame,
sizeof(
short int), num_values, fp ) != (
size_t) num_values ){
1749 cerr <<
"Could not read data from htk track file" << endl;
1752 if( frame != frame_buffer )
1754 if( compressA != compressA_Buffer )
1755 delete [] compressA;
1756 if( compressB != compressB_Buffer )
1757 delete [] compressB;
1759 return misc_read_error;
1763 swap_bytes_short( frame, num_values );
1766 tmp.
t(i) = ((float)frame[0]+compressB[0])/compressA[0];
1768 for( j=0; j<num_channels; ++j ){
1769 int index = j+first_channel;
1770 tmp.
a(i,j) = ((float)frame[index]+compressB[index])/compressA[index];
1776 if( frame != frame_buffer )
1778 if( compressA != compressA_Buffer )
1779 delete [] compressA;
1780 if( compressB != compressB_Buffer )
1781 delete [] compressB;
1784 float *frame, frame_buffer[REASONABLE_FRAME_SIZE];
1786 if (num_values > REASONABLE_FRAME_SIZE)
1787 frame =
new float[num_values];
1789 frame = frame_buffer;
1791 int first_channel = time_included?1:0;
1792 for( i=0; i<new_frames; i++ ){
1793 if( fread( frame,
sizeof(
float), num_values, fp ) != (
size_t) num_values ){
1794 cerr <<
"Could not read data from htk track file" << endl;
1796 if (frame != frame_buffer)
1798 return misc_read_error;
1801 swap_bytes_float( frame, num_values );
1804 tmp.
t(i) = frame[0];
1806 for( j=0; j<num_channels; ++j )
1807 tmp.
a(i, j) = frame[j+first_channel];
1812 if( frame != frame_buffer )
1820 for (i=0;i<order;i++)
1829 if ( (samp_type & HTK_ENERGY) && !(samp_type & HTK_NO_E) )
1833 if (samp_type & HTK_DELTA){
1834 for (j = 0; j < order; j++){
1840 if (samp_type & HTK_ENERGY)
1845 if (samp_type & HTK_AC){
1846 for(j=0;j<order;j++){
1847 t =
EST_String(
"ac")+ itoString(j+1)+
"_d_d";
1851 if (samp_type & HTK_ENERGY)
1856 if (i != num_channels){
1857 cerr <<
"Something went horribly wrong - wanted " << num_values
1858 <<
" channels in track but got " << i << endl;
1860 return wrong_format;
1862 tmp.f_set(
"contour_type",pname);
1864 tmp.set_file_type(tff_htk);
1881 { channel_voiced, 1 },
1882 { channel_power, 2},
1884 { channel_unknown, 0}
1897 f0_track.assign_map(ESPSF0TrackMap);
1906 f0_track.
a(i, channel_voiced) = track.
track_break(i) ? 0.1 : 1.2;
1907 f0_track.
a(i, channel_f0) = track.
track_break(i) ? 0.0: track.
a(i,0);
1910 f0_track.set_file_type(tff_esps);
1945 if (fz.channel_name(i) ==
"prob_voice")
1950 if (fz.channel_name(i) ==
"F0")
1958 if (fz.
a(i, f) < 1.0)
1965 if (fz.
a(i, p) < 0.5)
1981 int ncoefs, nchannels;
2002 for (
int c = 0; c < ncoefs; c++)
2004 lpc.
a(i, c) = track.
a(i, channel_lpc_0, c);
2005 lpc.
t(i) = track.
t(i);
2011 for(
int ii = 0; ii< track.
num_frames(); ii++)
2012 lpc.
a(ii, ncoefs) = track.
a(ii, channel_power);
2021 for (
EST_Litem *p = tlist.head(); p ; p = p->next())
2022 tlist(p).save(tlist(p).name(), otype);
2033 for (p = files.head(); p; p = p->next())
2037 if (read_track(tlist(plp), files(p), al) != format_ok)
2040 tlist(plp).set_name(files(p));
2053 startt = al.
fval(
"-startt" );
2056 ishift = al.
fval(
"ishift");
2058 ishift = al.
fval(
"-s");
2059 else if (al.
present(
"time_channel"))
2064 if (tr.
load(in_file, al.
val(
"-itype", 0), ishift, startt) != format_ok)
2069 if (tr.
load(in_file, ishift, startt ) != format_ok)
2088 EST_String EST_TrackFile::options_short(
void)
2092 for(
int n=0; n< EST_TrackFile::map.n() ; n++)
2094 const char *nm = EST_TrackFile::map.name(EST_TrackFile::map.token(n));
2105 EST_String EST_TrackFile::options_supported(
void)
2107 EST_String s(
"AvailablE track file formats:\n");
2109 for(
int n=0; n< EST_TrackFile::map.n() ; n++)
2111 const char *nm = EST_TrackFile::map.name(EST_TrackFile::map.token(n));
2112 const char *d = EST_TrackFile::map.info(EST_TrackFile::map.token(n)).description;
2122 { tff_none, {
"none" },
2124 "unknown track file type"}},
2125 {tff_esps, {
"esps" },
2126 {TRUE, EST_TrackFile::load_esps, EST_TrackFile::save_esps,
2127 "entropic sps file"}},
2128 {tff_est_ascii, {
"est",
"est_ascii" },
2129 {TRUE, EST_TrackFile::load_est, EST_TrackFile::save_est_ascii,
2130 "Edinburgh Speech Tools track file"}},
2131 {tff_est_binary, {
"est_binary" },
2132 {TRUE, EST_TrackFile::load_est, EST_TrackFile::save_est_binary,
2133 "Edinburgh Speech Tools track file"}}
2135 {tff_htk, {
"htk" },
2136 {TRUE, EST_TrackFile::load_htk, EST_TrackFile::save_htk,
2141 {tff_htk_fbank, {
"htk_fbank" },
2142 {FALSE, EST_TrackFile::load_htk, EST_TrackFile::save_htk_fbank,
2143 "htk file (as FBANK)"}},
2144 {tff_htk_mfcc, {
"htk_mfcc" },
2145 {FALSE, EST_TrackFile::load_htk, EST_TrackFile::save_htk_mfcc,
2146 "htk file (as MFCC)"}},
2147 {tff_htk_mfcc_e, {
"htk_mfcc_e" },
2148 {FALSE, EST_TrackFile::load_htk, EST_TrackFile::save_htk_mfcc_e,
2149 "htk file (as MFCC_E)"}},
2150 {tff_htk_user, {
"htk_user" },
2151 {FALSE, EST_TrackFile::load_htk, EST_TrackFile::save_htk_user,
2152 "htk file (as USER)"}},
2153 {tff_htk_discrete, {
"htk_discrete" },
2154 {FALSE, EST_TrackFile::load_htk, EST_TrackFile::save_htk_discrete,
2155 "htk file (as DISCRETE)"}},
2156 {tff_ssff, {
"ssff"},
2157 {TRUE, EST_TrackFile::load_ssff, EST_TrackFile::save_ssff,
2158 "Macquarie University's Simple Signal File Format"}},
2159 {tff_xmg, {
"xmg" },
2160 {TRUE, EST_TrackFile::load_xmg, EST_TrackFile::save_xmg,
2161 "xmg file viewer"}},
2162 {tff_xgraph, {
"xgraph" },
2163 {FALSE, EST_TrackFile::load_xgraph, EST_TrackFile::save_xgraph,
2164 "xgraph display program format"}},
2165 {tff_ema, {
"ema" },
2166 {FALSE, EST_TrackFile::load_ema, NULL,
2168 {tff_ema_swapped, {
"ema_swapped" },
2169 {FALSE, EST_TrackFile::load_ema_swapped, NULL,
2171 {tff_ascii, {
"ascii" },
2172 {TRUE, EST_TrackFile::load_ascii, EST_TrackFile::save_ascii,
2173 "ascii decimal numbers"}},
2174 { tff_none, {
"none"}, {FALSE, NULL, NULL,
"unknown track file type"} }
2180 EST_TrackFile::TS_Info> track_ts_names[] =
2182 { tff_none, {
"none" },
2184 "unknown track file type"}},
2186 {tff_est_ascii, {
"est"},
2187 {TRUE, EST_TrackFile::load_est_ts, EST_TrackFile::save_est_ts,
2188 "Edinburgh Speech Tools track file"}},
2190 {tff_est_binary, {
"est_binary"},
2191 {TRUE, EST_TrackFile::load_est_ts, EST_TrackFile::save_est_binary_ts,
2192 "Edinburgh Speech Tools track file"}},
2194 {tff_ssff, {
"ssff"},
2195 {TRUE, EST_TrackFile::load_ssff_ts, EST_TrackFile::save_ssff_ts,
2196 "Macquarie University's Simple Signal File Format"}},
2198 { tff_none, {
"none" },
2200 "unknown track file type"}}
2204 EST_TrackFile::ts_map(track_ts_names);
2207 #if defined(INSTANTIATE_TEMPLATES)
2209 #include "../base_class/EST_TNamedEnum.cc"
2212 const char *, EST_TrackFile::Info>;
2215 const char *, EST_TrackFile::TS_Info>;
void remove(const EST_String &name)
const EST_String S(const EST_String &path) const
int present(const EST_String &name) const
const int I(const EST_String &path) const
float fval(const EST_String &rkey, int m=1) const
const char * str(void) const
Get a const-pointer to the actual memory.
static EST_String cat(const EST_String s1, const EST_String s2=Empty, const EST_String s3=Empty, const EST_String s4=Empty, const EST_String s5=Empty, const EST_String s6=Empty, const EST_String s7=Empty, const EST_String s8=Empty, const EST_String s9=Empty)
int contains(const char *s, int pos=-1) const
Does it contain this substring?
void begin(const Container &over)
Set the iterator ready to run over this container.
const V & val(const K &rkey, bool m=0) const
return value according to key (const)
const int present(const K &rkey) const
Returns true if key is present.
void append(const T &item)
add item onto end of list
const T & first() const
return const reference to first item in list
void resize(int n, int set=1)
INLINE const T & a_no_check(int n) const
read-only const access operator: without bounds checking
void set_SingleCharSymbols(const EST_String &sc)
set which characters are to be treated as single character symbols
EST_Token get_upto(const EST_String &s)
get up to {\tt s} in stream as a single token.
int fread(void *buff, int size, int nitems) EST_WARN_UNUSED_RESULT
Reading binary data, (don't use peek() immediately beforehand)
const EST_String filename() const
The originating filename (if there is one)
int open_string(const EST_String &newbuffer)
open a \Ref{EST_TokenStream} for string rather than a file
EST_Token get_upto_eoln(void)
get up to {\tt s} in end of line as a single token.
EST_Token & peek(void)
peek at next token
int tell(void) const
tell, synonym for filepos
int open(const EST_String &filename)
open a \Ref{EST_TokenStream} for a file.
EST_TokenStream & get(EST_Token &t)
get next token in stream
int seek(int position)
seek, reposition file pointer
int channel_position(const char *name, int offset=0) const
void set_channel_name(const EST_String &name, int channel)
set the name of the channel.
float & a_no_check(int i, int c=0)
EST_read_status load(const EST_String name, float ishift=0.0, float startt=0.0)
int track_break(int i) const
return true if frame i is a break
int val(int i) const
return true if frame i is a value
float & a(int i, int c=0)
int num_aux_channels() const
return number of auxiliary channels in track
void set_name(const EST_String &n)
set name of track - redundant use access to features
bool has_channel(const char *name) const
bool equal_space() const
return true if track has equal (i.e. fixed) frame spacing */
void set_value(int i)
set frame i to be a value
float & t(int i=0)
return time position of frame i
void resize_aux(EST_StrList &map, bool preserve=1)
float ms_t(int i) const
return time of frame i in milli-seconds.
int num_channels() const
return number of channels in track
int num_frames() const
return number of frames in track
void change_type(float nshift, bool single_break)
REDO.
void resize(int num_frames, int num_channels, bool preserve=1)
EST_String name() const
name of track - redundant use access to features
void fill_time(float t, int start=1)
void set_break(int i)
set frame i to be a break
bool single_break() const
const EST_String & string(void) const