Context Navigation

Back to NATIVES_SRC

NATIVES_SRC: media_decoder.h

File media_decoder.h, 15.1 KB (added by boyanl, 15 years ago)

Line
1	/*
2	* media_messages.h
3	*
4	* Created on: Aug 18, 2009
5	* Author: nenko
6	*/
7
8	#ifndef MEDIA_MESSAGES_H_
9	#define MEDIA_MESSAGES_H_
10
11	extern "C" {
12	#include "libavformat/avformat.h"
13	#include "libswscale/swscale.h"
14	}
15
16	#include "../commons/util.h"
17	#include "../commons/java_messages.h"
18
19	//#undef TRACE
20	//#define TRACE do{} while(false)
21	//#undef DUMP
22	//#define DUMP(x) do{} while(false)
23	static const int SAMPLE_RATE = 44100;
24	static const int CHANNELS = 2;
25	static const int CHUNK_SAMPLES = SAMPLE_RATE / 100; /* 1/100s = 10ms */
26	static const int CHUNK_BYTES = CHUNK_SAMPLES * 4;
27	static const int BUFFER_SIZE = (AVCODEC_MAX_AUDIO_FRAME_SIZE * 6) / 2;
28	static const int CHUNK_MILLIS = 10;
29	static const int BUFFER_STEP = 1300; //ms
30	static const int BUFFER_EXTRA = 1200; //ms
31	static const int BUFFER_LEN = BUFFER_STEP + BUFFER_EXTRA; //ms
32	static const int ALLOWED_DIST_MILLIS = 2000;
33
34	class Decoder {
35	public:
36	Decoder() {
37	isOpen = false;
38	}
39
40	virtual ~Decoder() {
41	if (isOpen) {
42	if (videoStream > -1) {
43	av_free(buffer);
44	av_free(pFrameRGB);
45	av_free(pFrame);
46	avcodec_close(pVideoCodecCtx);
47	}
48	if (audioStream > -1) {
49	avcodec_close(pAudioCodecCtx);
50	}
51	av_close_input_file(pFormatContext);
52	isOpen = false;
53	}
54	}
55
56	int seek(int millis, int flags) {
57	assert(isOpen);
58	if (av_seek_frame(pFormatContext, -1, millis * 1000, flags) >= 0) {
59	return pFormatContext->data_offset;
60	}
61	fprintf(stderr, "Seeking ERROR!\tSeeking for : %d millisecond\n", millis);
62	return -1;
63
64	}
65
66	int seekStream(int millis, int stream, int flags) {
67	assert(isOpen);
68	int64_t rescaleMillis = rescaleToStream(millis, stream);
69	//if (avformat_seek_file(pFormatContext, stream, rescaleMillis - 10, rescaleMillis, rescaleMillis + 10, flags) >= 0) {
70	if (av_seek_frame(pFormatContext, stream, rescaleMillis, flags) >= 0) {
71	return pFormatContext->data_offset;
72	}
73	fprintf(stderr, "Seeking Stream ERROR!\tSeeking for : %d millisecond\n", millis);
74	return -1;
75
76	}
77
78	int rescaleToStream(int millis, int stream) {
79	int num = pFormatContext->streams[stream]->time_base.num;
80	int den = pFormatContext->streams[stream]->time_base.den;
81	return av_rescale(millis * 1000, den, AV_TIME_BASE * (int64_t) num);
82	}
83
84	double rescaleToSeconds(int timeStamp, int stream) {
85	int num = pFormatContext->streams[stream]->time_base.num;
86	int den = pFormatContext->streams[stream]->time_base.den;
87	int64_t rescaledTime = av_rescale(timeStamp, AV_TIME_BASE
88	* (int64_t) num, den);
89	return double(rescaledTime) / AV_TIME_BASE;
90	}
91
92	vector<byte> lastAudioBuffer;
93	int lastAudioStep;
94	bool shifted;
95	vector<byte> lastVideoFrame;
96	int lastVideoMillis;
97	int lastVideoDur;
98
99	vector<byte> const& getAudioBuffer(int step) {
100	//step is in STEP_SIZE
101	assert(isOpen);
102	if (lastAudioStep != step) {
103	AVPacket packet;
104	lastAudioBuffer.clear();
105	int millis = step * BUFFER_STEP;
106	//seekStream(millis, audioStream, AVSEEK_FLAG_ANY); //AVSEEK_FLAG_BACKWARD
107	seek(millis, AVSEEK_FLAG_ANY \| AVSEEK_FLAG_BACKWARD);
108	shifted = true;
109	int currentTime = rescaleToStream(millis, audioStream);
110	int targetTime = rescaleToStream(millis + BUFFER_LEN, audioStream);
111	bool first = true;
112	while (currentTime <= targetTime && av_read_frame(pFormatContext,
113	&packet) >= 0) {
114
115	//DUMP(packet.size);
116
117	if (first && packet.stream_index == audioStream) {
118	double firstSeconds = rescaleToSeconds(packet.pts,
119	audioStream);
120
121	double delta = firstSeconds - millis / 1000.0;
122	if (delta < 0) {
123	DUMP(millis);
124	DUMP(firstSeconds * 1000);
125	av_free_packet(&packet);
126	continue;
127	}
128
129	int padding = int(delta * 1000 / CHUNK_MILLIS * CHUNK_SAMPLES + 0.5) * 4;
130	if (padding > 0) {
131	DUMP(padding);
132	DUMP(lastAudioBuffer.size());
133	}
134
135	lastAudioBuffer.insert(lastAudioBuffer.begin(), padding, 0);
136	first = false;
137	}
138	if (packet.stream_index == audioStream) {
139
140	try {
141	vector<byte> packetSamples = decodeAudio(packet);
142	lastAudioBuffer.insert(lastAudioBuffer.end(),
143	packetSamples.begin(), packetSamples.end());
144	currentTime = packet.pts + packet.duration;
145
146	//currentTime += packet.duration;
147	packetSamples.clear();
148	} catch(runtime_error const& ex) {
149	cerr << ex.what() << endl;
150	}
151	}
152	if (packet.data) {
153	av_free_packet(&packet);
154	}
155	}
156	//double audioEndSeconds = rescaleToSeconds(currentTime, audioStream);
157	//assert(audioEndSeconds * 1000 > millis + BUFFER_LEN);
158	//DUMP(millis / 1000.0);
159	//DUMP(audioEndSeconds);
160	//DUMP(double(lastAudioBuffer.size()) / CHUNK_BYTES / 100);
161
162	lastAudioStep = step;
163	}
164
165	return lastAudioBuffer;
166	}
167
168	vector<byte> getAudio(int millis) {
169	assert(isOpen);
170	assert(millis >= 0);
171	assert(millis % CHUNK_MILLIS == 0);
172
173	int stepNum;
174	if (millis < BUFFER_STEP) {
175	stepNum = 0;
176	} else if (millis % BUFFER_STEP < BUFFER_EXTRA / 2) {
177	stepNum = (millis - BUFFER_EXTRA) / BUFFER_STEP;
178	} else {
179	stepNum = millis / BUFFER_STEP;
180	}
181	vector<byte> const& v = getAudioBuffer(stepNum);
182	int index = (millis - stepNum * BUFFER_STEP) / CHUNK_MILLIS;
183	// DUMP(millis);
184	// DUMP(pAudioCodecCtx->channels);
185	// DUMP(pAudioCodecCtx->sample_fmt);
186	// DUMP(stepNum);
187	// DUMP(index);
188	// DUMP(v.size());
189	// DUMP(index * CHUNK_BYTES);
190	// DUMP((index + 1) * CHUNK_BYTES);
191	// DUMP((index + 1) * CHUNK_BYTES - index * CHUNK_BYTES);
192	// assert(index * CHUNK_BYTES < (int)v.size());
193	// int end = ((index + 1) * CHUNK_BYTES <= (int)v.size())? (index + 1) * CHUNK_BYTES : (int)v.size();
194	// assert((index + 1) * CHUNK_BYTES <= (int)v.size());
195	return vector<byte> (v.begin() + index * CHUNK_BYTES, v.begin()
196	+ (index + 1) * CHUNK_BYTES);
197	}
198
199	void initVideo() {
200	pVideoCodecCtx = pFormatContext->streams[videoStream]->codec;
201	pFrame = avcodec_alloc_frame();
202	pFrameRGB = avcodec_alloc_frame();
203
204	pixFMT = PIX_FMT_RGB32;
205	int numBytes = avpicture_get_size(pixFMT, pVideoCodecCtx->width,
206	pVideoCodecCtx->height);
207	buffer = (uint8_t) av_malloc(numBytes sizeof(uint8_t));
208
209	pVideoCodec = avcodec_find_decoder(pVideoCodecCtx->codec_id);
210	if (pVideoCodec == NULL) {
211	throw runtime_error("Decoder::sendFrame - Unsupported format!");
212	}
213
214	if (avcodec_open(pVideoCodecCtx, pVideoCodec) < 0) {
215	throw runtime_error("Decoder::initVideo - Cannot open codec!");
216	}
217
218	int w = pVideoCodecCtx->width;
219	int h = pVideoCodecCtx->height;
220
221	img_convert_ctx = sws_getContext(w, h, pVideoCodecCtx->pix_fmt, w,
222	h, pixFMT, SWS_BICUBIC, NULL, NULL, NULL);
223
224	if (img_convert_ctx == NULL) {
225	throw runtime_error(
226	"Decoder::decodeFrame - Cannot initialize the conversion context!");
227	}
228
229	}
230	void initAudio() {
231	pAudioCodecCtx = pFormatContext->streams[audioStream]->codec;
232	pAudioCodec = avcodec_find_decoder(pAudioCodecCtx->codec_id);
233	if (!pAudioCodec) {
234	throw runtime_error("Decoder::initAudio - Unsupported format!");
235	}
236	avcodec_open(pAudioCodecCtx, pAudioCodec);
237	lastAudioStep = -1;
238	}
239	int getLinesize() const {
240	return pFrame->linesize[0];
241	}
242	int decodeFrame(vector<byte>& data) {
243
244	//if (img_convert_ctx == NULL) {
245
246	sws_scale(img_convert_ctx, pFrame->data, pFrame->linesize, 0,
247	pVideoCodecCtx->height, pFrameRGB->data,
248	pFrameRGB->linesize);
249	//}
250	return saveFrame(data, pFrameRGB, pVideoCodecCtx->width,
251	pVideoCodecCtx->height);
252
253	}
254	int getFrame(vector<byte>& data, int millis) {
255	assert(isOpen == true);
256	avpicture_fill((AVPicture*) pFrameRGB, buffer, pixFMT,
257	pVideoCodecCtx->width, pVideoCodecCtx->height);
258	int frameFinished, packetCnt;
259	AVPacket packet;
260
261	int64_t pktTime;
262	AVRational pktTimeBase;
263	int pktMillis;
264	if (shifted \|\| !(lastVideoMillis <= millis && millis <= lastVideoMillis + ALLOWED_DIST_MILLIS)) {
265	seekStream(millis, videoStream, AVSEEK_FLAG_BACKWARD);
266	shifted = false;
267	}
268	if (millis >= lastVideoMillis && millis <= lastVideoMillis + lastVideoDur){
269	data = lastVideoFrame;
270	DUMP(lastVideoFrame.size());
271	return lastVideoFrame.size();
272	}
273	packetCnt = 0;
274	while (av_read_frame(pFormatContext, &packet) >= 0) {
275	if (packet.stream_index == videoStream) {
276	packetCnt++;
277	pktTimeBase = pFormatContext->streams[videoStream]->time_base;
278	pktTime = av_rescale(packet.dts, AV_TIME_BASE * (int64_t) pktTimeBase.num, pktTimeBase.den);
279	pktMillis = pktTime * 1000.0 / AV_TIME_BASE + 0.5;
280
281	int64_t pktDur = av_rescale(packet.duration, AV_TIME_BASE * (int64_t) pktTimeBase.num, pktTimeBase.den);
282	pktDur = pktDur * 1000.0 / AV_TIME_BASE + 0.5;
283	int ret = avcodec_decode_video2(pVideoCodecCtx, pFrame, &frameFinished, &packet);
284	if (ret < 0) {
285	throw runtime_error("err_corrupted_frame: Frame can't be decoded");
286	}
287	//assert(ret >= 0);
288	//if (pktMillis + pktDur < millis){
289	// av_free_packet(&packet);
290	// continue;
291	// }
292	if (pktMillis >= millis) {
293	if (frameFinished) {
294	//DUMP(packetCnt);
295	int res = decodeFrame(data);
296	lastVideoMillis = millis;
297	lastVideoDur = pktDur;
298	lastVideoFrame = data;
299	return res;
300	}
301	}
302	}
303	av_free_packet(&packet);
304	}
305	throw runtime_error("err_corrupted_frame: Frame can't be decoded");
306	}
307
308	vector<byte> decodeAudio(AVPacket& packet) {
309	ReSampleContext *resmpCtx = av_audio_resample_init(CHANNELS,
310	pAudioCodecCtx->channels, SAMPLE_RATE,
311	pAudioCodecCtx->sample_rate, SAMPLE_FMT_S16,
312	pAudioCodecCtx->sample_fmt, 0, 5, 1, 0.8);
313
314	vector<byte> audioBuf(BUFFER_SIZE, 0);
315	int audioSize = 0;
316	int len;
317	uint8_t* oldPacketData = packet.data;
318	int oldPacketSize = packet.size;
319
320	do {
321	int tempAudioSize = BUFFER_SIZE;
322	len = avcodec_decode_audio3(pAudioCodecCtx,
323	(int16_t*) (&audioBuf[0] + audioSize), &tempAudioSize,
324	&packet);
325	if (len <= 0) {
326	throw runtime_error("err_damaged_packet: length <= 0");
327	}
328	audioSize += tempAudioSize;
329	if (audioBuf.size() - audioSize < BUFFER_SIZE / 2) {
330	audioBuf.resize(audioBuf.size() + BUFFER_SIZE);
331	}
332	packet.size -= len;
333	packet.data += len;
334
335	} while (packet.size > 0);
336	packet.data = oldPacketData;
337	packet.size = oldPacketSize;
338
339	audioBuf.resize(audioSize);
340	vector<byte> resampleBuf(BUFFER_SIZE, 0);
341	int resampleSize = BUFFER_SIZE;
342	int coef = (pAudioCodecCtx->sample_fmt == 0)? 1 : (pAudioCodecCtx->sample_fmt == 1)? 2 : 4; //SAMPLE_FMT_U8 == 0, SAMPLE_FMT_S16 == 1, SAMPLE_FMT_S32 == 2
343	resampleSize = audio_resample(resmpCtx, (short*) &resampleBuf[0],
344	(short*) &audioBuf[0], audioSize / coef); //audioSize / 2
345	// the stupid resample function is returning the number of source samples for mono...
346	// NOTE this may not work nice with other than 1 or 2 channels
347	//pAudioCodecCtx->
348	int resampleMul = pAudioCodecCtx->channels == 1 ? 4 : 2;
349	//DUMP(resampleSize*resampleMul);
350	resampleBuf.resize(resampleSize * resampleMul);
351	//I really have no idea why resampleBuf is 2 times smaller for mono input
352
353
354	audio_resample_close(resmpCtx);
355	audioBuf.clear();
356	return resampleBuf;
357	//return audioBuf;
358	}
359	bool isOpen;
360	int fileSize;
361
362	AVFormatContext *pFormatContext;
363
364	AVCodecContext *pVideoCodecCtx;
365	int videoStream;
366	uint8_t *buffer;
367	AVFrame pFrame, pFrameRGB;
368	AVCodec *pVideoCodec;
369	PixelFormat pixFMT;
370	struct SwsContext* img_convert_ctx;
371
372	AVCodecContext *pAudioCodecCtx;
373	AVCodec *pAudioCodec;
374	int audioStream;
375
376	int saveFrame(vector<byte>& data, AVFrame *pFrame, int width, int height) {
377	int linesize = pFrame->linesize[0];
378	const int size = linesize * height;
379	data.resize (size);
380
381	/*for (int i = 0; i < size; i++)
382	data.push_back(pFrame->data[0][i]);*/
383
384	memcpy (&data[0], pFrame->data[0], size);
385	return size;
386	}
387
388	};
389
390	enum MediaCommandIds {
391	GET_FRAME_COMMAND_ID = 10,
392	GET_INFO_COMMAND_ID,
393	OPEN_COMMAND_ID,
394	CLOSE_COMMAND_ID,
395	GET_AUDIO_COMMAND_ID,
396	SEND_BYTES_COMMAND,
397	SEEK_COMMAND,
398	READ_FRAME_COMMAND,
399	DECODE_FRAME_COMMAND
400	};
401
402	enum MediaResponseIds {
403	REQUEST_BYTES_RESPONSE = 10,
404	FRAME_RESPONSE,
405	VIDEO_INFO_RESPONSE,
406	SENT_BYTES_RESPONSE,
407	AUDIO_RESPONSE
408	};
409
410	class GetFrameCommand: public Command {
411	public:
412	GetFrameCommand(vector<byte> const& data, int& pos) :
413	Command(data, pos) {
414	timeMillis = readInt(data, pos);
415	assert(pos == (int) data.size());
416	assert(getId() == GET_FRAME_COMMAND_ID);
417	}
418
419	virtual Response execute();
420
421	private:
422	int timeMillis;
423	};
424
425	class GetAudioCommand: public Command {
426	public:
427	GetAudioCommand(vector<byte> const& data, int& pos) :
428	Command(data, pos) {
429	timeMillis = readInt(data, pos);
430	assert(pos == (int) data.size());
431	assert(getId() == GET_AUDIO_COMMAND_ID);
432	}
433
434	virtual Response execute();
435
436	private:
437	int timeMillis;
438	};
439
440	class GetInfoCommand: public Command {
441	public:
442	GetInfoCommand(vector<byte> const& data, int& pos) :
443	Command(data, pos) {
444	assert(pos == (int) data.size());
445	assert(getId() == GET_INFO_COMMAND_ID);
446	}
447
448	virtual Response execute();
449	};
450
451	class OpenCommand: public Command {
452
453	public:
454	OpenCommand(vector<byte> const& data, int& pos) :
455	Command(data, pos) {
456	fileSize = readInt(data, pos);
457	assert(pos == (int) data.size());
458	assert(getId() == OPEN_COMMAND_ID);
459	}
460	virtual Response execute();
461	private:
462	int fileSize;
463	};
464
465	struct SendBytesCommand: public Command {
466	int filePos;
467	vector<byte> fileData;
468
469	SendBytesCommand(vector<byte> const& data, int& pos) :
470	Command(data, pos) {
471	filePos = readInt(data, pos);
472	readBinData(data, pos, fileData);
473	assert(pos == (int) data.size());
474	assert(getId() == SEND_BYTES_COMMAND);
475	}
476	virtual Response execute() {
477	//do nothing - special handling
478	throw runtime_error("SendBytesCommand::execute() Should not be called!");
479	}
480	};
481
482	class MediaBridge: public JavaBridge {
483	public:
484
485	virtual Command* createCommand(int id, vector<byte> const& data, int& pos) {
486	switch (id) {
487	case GET_INFO_COMMAND_ID:
488	return new GetInfoCommand(data, pos);
489	case OPEN_COMMAND_ID:
490	return new OpenCommand(data, pos);
491	case GET_FRAME_COMMAND_ID:
492	return new GetFrameCommand(data, pos);
493	case GET_AUDIO_COMMAND_ID:
494	return new GetAudioCommand(data, pos);
495	case SEND_BYTES_COMMAND:
496	return new SendBytesCommand(data, pos);
497	default:
498	return JavaBridge::createCommand(id, data, pos);
499	}
500	}
501	};
502
503	struct MediaResponse {
504	static Response createRequestBytes(int pos, int count) {
505	vector<byte> data;
506	Message::writeInt(data, REQUEST_BYTES_RESPONSE);
507	Message::writeInt(data, pos);
508	Message::writeInt(data, count);
509	return Response(data);
510	}
511	static Response createFrame(vector<byte> pixels, int width, int height) {
512	vector<byte> data;
513	Message::writeInt(data, FRAME_RESPONSE);
514	Message::writeInt(data, width);
515	Message::writeInt(data, height);
516	Message::writeBinData(data, pixels, false);
517	return Response(data);
518	}
519	static Response createAudio(vector<byte> chunk, int pos) {
520	vector<byte> data;
521	assert((int) chunk.size() == CHUNK_BYTES);
522	Message::writeInt(data, AUDIO_RESPONSE);
523	Message::writeInt(data, pos);
524	Message::writeBinData(data, chunk);
525	return Response(data);
526	}
527	static Response createVideoInfo(int width, int height, int millis, AVRational timeBase, bool hasAudio) {
528	vector<byte> data;
529	Message::writeInt(data, VIDEO_INFO_RESPONSE);
530	Message::writeInt(data, millis);
531	Message::writeInt(data, width);
532	Message::writeInt(data, height);
533	Message::writeInt(data, (int) hasAudio);
534	Message::writeInt(data, timeBase.num);
535	Message::writeInt(data, timeBase.den);
536	return Response(data);
537	}
538	};
539
540	#endif /* MEDIA_MESSAGES_H_ */
541

Download in other formats:

Original Format