Project

General

Profile

Feature #6612 ยป zlib.inflate_deflate_chunked.2.patch

drbrain (Eric Hodel), 07/04/2012 05:38 AM

View differences:

ext/zlib/zlib.c (working copy)
544 544
#define ZSTREAM_FLAG_IN_STREAM  0x2
545 545
#define ZSTREAM_FLAG_FINISHED   0x4
546 546
#define ZSTREAM_FLAG_CLOSING    0x8
547
#define ZSTREAM_FLAG_UNUSED     0x10
547
#define ZSTREAM_FLAG_GZFILE     0x10 /* disallows yield from expand_buffer for
548
                                        gzip*/
549
#define ZSTREAM_FLAG_UNUSED     0x20
548 550

  
549 551
#define ZSTREAM_READY(z)       ((z)->flags |= ZSTREAM_FLAG_READY)
550 552
#define ZSTREAM_IS_READY(z)    ((z)->flags & ZSTREAM_FLAG_READY)
551 553
#define ZSTREAM_IS_FINISHED(z) ((z)->flags & ZSTREAM_FLAG_FINISHED)
552 554
#define ZSTREAM_IS_CLOSING(z)  ((z)->flags & ZSTREAM_FLAG_CLOSING)
555
#define ZSTREAM_IS_GZFILE(z)   ((z)->flags & ZSTREAM_FLAG_GZFILE)
556

  
557
#define ZSTREAM_EXPAND_BUFFER_OK          0
558
#define ZSTREAM_EXPAND_BUFFER_MEM_ERROR (-1)
553 559

  
554 560
/* I think that more better value should be found,
555 561
   but I gave up finding it. B) */
......
568 574

  
569 575
struct zstream_run_args {
570 576
    struct zstream * z;
571
    int flush;
572
    int interrupt;
577
    int flush;      /* stream flush value for inflate() or deflate() */
578
    int interrupt;  /* stop processing the stream and return to ruby */
579
    int jump_state; /* for buffer expansion block break or exception */
573 580
};
574 581

  
575 582
static voidpf
......
614 621
static void
615 622
zstream_expand_buffer(struct zstream *z)
616 623
{
617
    long inc;
618

  
619 624
    if (NIL_P(z->buf)) {
620
	    /* I uses rb_str_new here not rb_str_buf_new because
621
	       rb_str_buf_new makes a zero-length string. */
622
	z->buf = rb_str_new(0, ZSTREAM_INITIAL_BUFSIZE);
623
	z->buf_filled = 0;
624
	z->stream.next_out = (Bytef*)RSTRING_PTR(z->buf);
625
	z->stream.avail_out = ZSTREAM_INITIAL_BUFSIZE;
626
	RBASIC(z->buf)->klass = 0;
625
	zstream_expand_buffer_into(z, ZSTREAM_INITIAL_BUFSIZE);
627 626
	return;
628 627
    }
629 628

  
630
    if (RSTRING_LEN(z->buf) - z->buf_filled >= ZSTREAM_AVAIL_OUT_STEP_MAX) {
631
	/* to keep other threads from freezing */
632
	z->stream.avail_out = ZSTREAM_AVAIL_OUT_STEP_MAX;
629
    if (!ZSTREAM_IS_GZFILE(z) && rb_block_given_p()) {
630
	if (z->buf_filled >= ZSTREAM_AVAIL_OUT_STEP_MAX) {
631
	    int state = 0;
632
	    VALUE self = (VALUE)z->stream.opaque;
633

  
634
	    rb_str_resize(z->buf, z->buf_filled);
635
	    RBASIC(z->buf)->klass = rb_cString;
636
	    OBJ_INFECT(z->buf, self);
637

  
638
	    rb_protect(rb_yield, z->buf, &state);
639

  
640
	    z->buf = Qnil;
641
	    zstream_expand_buffer_into(z, ZSTREAM_AVAIL_OUT_STEP_MAX);
642

  
643
	    if (state)
644
		rb_jump_tag(state);
645

  
646
	    return;
647
	}
648
	else {
649
	    zstream_expand_buffer_into(z,
650
		    ZSTREAM_AVAIL_OUT_STEP_MAX - z->buf_filled);
651
	}
633 652
    }
634 653
    else {
635
	inc = z->buf_filled / 2;
636
	if (inc < ZSTREAM_AVAIL_OUT_STEP_MIN) {
637
	    inc = ZSTREAM_AVAIL_OUT_STEP_MIN;
654
	if (RSTRING_LEN(z->buf) - z->buf_filled >= ZSTREAM_AVAIL_OUT_STEP_MAX) {
655
	    z->stream.avail_out = ZSTREAM_AVAIL_OUT_STEP_MAX;
638 656
	}
639
	rb_str_resize(z->buf, z->buf_filled + inc);
640
	z->stream.avail_out = (inc < ZSTREAM_AVAIL_OUT_STEP_MAX) ?
641
	    (int)inc : ZSTREAM_AVAIL_OUT_STEP_MAX;
657
	else {
658
	    long inc = z->buf_filled / 2;
659
	    if (inc < ZSTREAM_AVAIL_OUT_STEP_MIN) {
660
		inc = ZSTREAM_AVAIL_OUT_STEP_MIN;
661
	    }
662
	    rb_str_resize(z->buf, z->buf_filled + inc);
663
	    z->stream.avail_out = (inc < ZSTREAM_AVAIL_OUT_STEP_MAX) ?
664
		(int)inc : ZSTREAM_AVAIL_OUT_STEP_MAX;
665
	}
666
	z->stream.next_out = (Bytef*)RSTRING_PTR(z->buf) + z->buf_filled;
642 667
    }
643
    z->stream.next_out = (Bytef*)RSTRING_PTR(z->buf) + z->buf_filled;
644 668
}
645 669

  
646 670
static void
......
663 687
}
664 688

  
665 689
static int
690
zstream_expand_buffer_protect(struct zstream *z)
691
{
692
    int state = 0;
693

  
694
    rb_protect((VALUE (*)(VALUE))zstream_expand_buffer, (VALUE)z, &state);
695

  
696
    return state;
697
}
698

  
699
static int
666 700
zstream_expand_buffer_without_gvl(struct zstream *z)
667 701
{
668 702
    char * new_str;
669 703
    long inc, len;
670 704

  
671
    if (RSTRING_LEN(z->buf) - z->buf_filled >= ZSTREAM_AVAIL_OUT_STEP_MAX) {
705
    if (rb_block_given_p()) {
706
	return rb_thread_call_with_gvl(zstream_expand_buffer_protect, (void *)z);
707
    }
708
    else if (RSTRING_LEN(z->buf) - z->buf_filled >= ZSTREAM_AVAIL_OUT_STEP_MAX) {
672 709
	z->stream.avail_out = ZSTREAM_AVAIL_OUT_STEP_MAX;
673 710
    }
674 711
    else {
......
682 719
	new_str = realloc(RSTRING(z->buf)->as.heap.ptr, len + 1);
683 720

  
684 721
	if (!new_str)
685
	    return 0;
722
	    return ZSTREAM_EXPAND_BUFFER_MEM_ERROR;
686 723

  
687 724
	/* from rb_str_resize */
688 725
	RSTRING(z->buf)->as.heap.ptr = new_str;
......
695 732
    }
696 733
    z->stream.next_out = (Bytef*)RSTRING_PTR(z->buf) + z->buf_filled;
697 734

  
698
    return 1;
735
    return ZSTREAM_EXPAND_BUFFER_OK;
699 736
}
700 737

  
701 738
static void
......
862 899
static VALUE
863 900
zstream_detach_input(struct zstream *z)
864 901
{
865
    VALUE dst;
902
    VALUE dst, self = (VALUE)z->stream.opaque;
866 903

  
867 904
    if (NIL_P(z->input)) {
868 905
	dst = rb_str_new(0, 0);
......
920 957
zstream_run_func(void *ptr)
921 958
{
922 959
    struct zstream_run_args *args = (struct zstream_run_args *)ptr;
923
    int err, flush = args->flush;
960
    int err = Z_OK, flush = args->flush;
924 961
    struct zstream *z = args->z;
925 962
    uInt n;
926 963

  
......
943 980
	    break;
944 981
	}
945 982

  
946
	if (!zstream_expand_buffer_without_gvl(z)) {
983
	state = zstream_expand_buffer_without_gvl(z);
984

  
985
	if (state == ZSTREAM_EXPAND_BUFFER_MEM_ERROR) {
947 986
	    err = Z_MEM_ERROR; /* realloc failed */
948 987
	    break;
949 988
	}
989
	else if (state) {
990
	    err = Z_OK; /* buffer expanded but stream processing was stopped */
991
	    args->jump_state = state;
992
	    break;
993
	}
950 994
    }
951 995

  
952 996
    return (VALUE)err;
......
973 1017
    args.z = z;
974 1018
    args.flush = flush;
975 1019
    args.interrupt = 0;
1020
    args.jump_state = 0;
976 1021

  
977 1022
    if (NIL_P(z->input) && len == 0) {
978 1023
	z->stream.next_in = (Bytef*)"";
......
1024 1069
	zstream_append_input(z, z->stream.next_in, z->stream.avail_in);
1025 1070
        guard = Qnil; /* prevent tail call to make guard effective */
1026 1071
    }
1072

  
1073
    if (args.jump_state)
1074
	rb_jump_tag(args.jump_state);
1027 1075
}
1028 1076

  
1029 1077
static VALUE
......
1206 1254
}
1207 1255

  
1208 1256
/*
1209
 * Finishes the stream and flushes output buffer. See Zlib::Deflate#finish and
1210
 * Zlib::Inflate#finish for details of this behavior.
1257
 * call-seq:
1258
 *   finish                 -> String
1259
 *   finish { |chunk| ... } -> nil
1260
 *
1261
 * Finishes the stream and flushes output buffer.  If a block is given each
1262
 * chunk is yielded to the block until the input buffer has been flushed to
1263
 * the output buffer.
1211 1264
 */
1212 1265
static VALUE
1213 1266
rb_zstream_finish(VALUE obj)
......
1220 1273
}
1221 1274

  
1222 1275
/*
1223
 * Flushes input buffer and returns all data in that buffer.
1276
 * call-seq:
1277
 *   flush_next_out                 -> String
1278
 *   flush_next_out { |chunk| ... } -> nil
1279
 *
1280
 * Flushes output buffer and returns all data in that buffer.  If a block is
1281
 * given each chunk is yielded to the block until the current output buffer
1282
 * has been flushed.
1224 1283
 */
1225 1284
static VALUE
1226 1285
rb_zstream_flush_next_in(VALUE obj)
......
1502 1561
/*
1503 1562
 * Document-method: Zlib::Deflate.deflate
1504 1563
 *
1505
 * call-seq: Zlib.deflate(string[, level])
1506
 *           Zlib::Deflate.deflate(string[, level])
1564
 * call-seq:
1565
 *   Zlib.deflate(string[, level])
1566
 *   Zlib::Deflate.deflate(string[, level])
1507 1567
 *
1508 1568
 * Compresses the given +string+. Valid values of level are
1509
 * <tt>NO_COMPRESSION</tt>, <tt>BEST_SPEED</tt>,
1510
 * <tt>BEST_COMPRESSION</tt>, <tt>DEFAULT_COMPRESSION</tt>, and an
1511
 * integer from 0 to 9 (the default is 6).
1569
 * Zlib::NO_COMPRESSION, Zlib::BEST_SPEED, Zlib::BEST_COMPRESSION,
1570
 * Zlib::DEFAULT_COMPRESSION, or an integer from 0 to 9 (the default is 6).
1512 1571
 *
1513 1572
 * This method is almost equivalent to the following code:
1514 1573
 *
......
1562 1621
}
1563 1622

  
1564 1623
/*
1565
 * Document-method: Zlib#deflate
1624
 * Document-method: Zlib::Deflate#deflate
1566 1625
 *
1567 1626
 * call-seq:
1568
 *   deflate(string, flush = Zlib::NO_FLUSH)
1627
 *   z.deflate(string, flush = Zlib::NO_FLUSH)                 -> String
1628
 *   z.deflate(string, flush = Zlib::NO_FLUSH) { |chunk| ... } -> nil
1569 1629
 *
1570 1630
 * Inputs +string+ into the deflate stream and returns the output from the
1571 1631
 * stream.  On calling this method, both the input and the output buffers of
1572
 * the stream are flushed.
1632
 * the stream are flushed.  If +string+ is nil, this method finishes the
1633
 * stream, just like Zlib::ZStream#finish.
1573 1634
 *
1574
 * If +string+ is nil, this method finishes the stream, just like
1575
 * Zlib::ZStream#finish.
1635
 * If a block is given consecutive deflated chunks from the +string+ are
1636
 * yielded to the block and +nil+ is returned.
1576 1637
 *
1577 1638
 * The +flush+ parameter specifies the flush mode.  The following constants
1578 1639
 * may be used:
......
1619 1680
 * Document-method: Zlib::Deflate#flush
1620 1681
 *
1621 1682
 * call-seq:
1622
 *   flush(flush = Zlib::SYNC_FLUSH)
1683
 *   flush(flush = Zlib::SYNC_FLUSH)                 -> String
1684
 *   flush(flush = Zlib::SYNC_FLUSH) { |chunk| ... } -> nil
1623 1685
 *
1624 1686
 * This method is equivalent to <tt>deflate('', flush)</tt>. This method is
1625
 * just provided to improve the readability of your Ruby program.
1687
 * just provided to improve the readability of your Ruby program.  If a block
1688
 * is given chunks of deflate output are yielded to the block until the buffer
1689
 * is flushed.
1626 1690
 *
1627 1691
 * See Zlib::Deflate#deflate for detail on the +flush+ constants NO_FLUSH,
1628 1692
 * SYNC_FLUSH, FULL_FLUSH and FINISH.
......
1810 1874
}
1811 1875

  
1812 1876
/*
1813
 * Document-method: Zlib::Inflate.inflate
1877
 * Document-method: Zlib::inflate
1814 1878
 *
1815
 * call-seq: Zlib::Inflate.inflate(string)
1879
 * call-seq:
1880
 *   Zlib.inflate(string)
1881
 *   Zlib::Inflate.inflate(string)
1816 1882
 *
1817 1883
 * Decompresses +string+. Raises a Zlib::NeedDict exception if a preset
1818 1884
 * dictionary is needed for decompression.
......
1888 1954
/*
1889 1955
 * Document-method: Zlib::Inflate#inflate
1890 1956
 *
1891
 * call-seq: inflate(string)
1957
 * call-seq:
1958
 *   inflate(deflate_string)                 -> String
1959
 *   inflate(deflate_string) { |chunk| ... } -> nil
1960
 *
1961
 * Inputs +deflate_string+ into the inflate stream and returns the output from
1962
 * the stream.  Calling this method, both the input and the output buffer of
1963
 * the stream are flushed.  If string is +nil+, this method finishes the
1964
 * stream, just like Zlib::ZStream#finish.
1892 1965
 *
1893
 * Inputs +string+ into the inflate stream and returns the output from the
1894
 * stream.  Calling this method, both the input and the output buffer of the
1895
 * stream are flushed.  If string is +nil+, this method finishes the stream,
1896
 * just like Zlib::ZStream#finish.
1966
 * If a block is given consecutive inflated chunks from the +deflate_string+
1967
 * are yielded to the block and +nil+ is returned.
1897 1968
 *
1898 1969
 * Raises a Zlib::NeedDict exception if a preset dictionary is needed to
1899 1970
 * decompress.  Set the dictionary by Zlib::Inflate#set_dictionary and then
......
2167 2238

  
2168 2239
    obj = Data_Make_Struct(klass, struct gzfile, gzfile_mark, gzfile_free, gz);
2169 2240
    zstream_init(&gz->z, funcs);
2241
    gz->z.flags |= ZSTREAM_FLAG_GZFILE;
2170 2242
    gz->io = Qnil;
2171 2243
    gz->level = 0;
2172 2244
    gz->mtime = 0;
test/zlib/test_zlib.rb (working copy)
39 39
      assert_raise(Zlib::StreamError) { Zlib::Deflate.deflate("foo", 10000) }
40 40
    end
41 41

  
42
    def test_deflate_chunked
43
      original = ''
44
      chunks = []
45
      r = Random.new 0
46

  
47
      z = Zlib::Deflate.new
48

  
49
      2.times do
50
        input = r.bytes(16384)
51
        original << input
52
        z.deflate(input) do |chunk|
53
          chunks << chunk
54
        end
55
      end
56

  
57
      assert_equal [2, 16384, 10],
58
                   chunks.map { |chunk| chunk.length }
59

  
60
      final = z.finish
61

  
62
      assert_equal 16388, final.length
63

  
64
      all = chunks.join
65
      all << final
66

  
67
      inflated = Zlib.inflate all
68

  
69
      assert_equal original, inflated
70
    end
71

  
72
    def test_deflate_chunked_break
73
      chunks = []
74
      r = Random.new 0
75

  
76
      z = Zlib::Deflate.new
77

  
78
      input = r.bytes(16384)
79
      z.deflate(input) do |chunk|
80
        chunks << chunk
81
        break
82
      end
83

  
84
      assert_equal [2], chunks.map { |chunk| chunk.length }
85

  
86
      final = z.finish
87

  
88
      assert_equal 16393, final.length
89

  
90
      all = chunks.join
91
      all << final
92

  
93
      original = Zlib.inflate all
94

  
95
      assert_equal input, original
96
    end
97

  
42 98
    def test_addstr
43 99
      z = Zlib::Deflate.new
44 100
      z << "foo"
......
202 258
      assert_equal "foofoofoo", out
203 259
    end
204 260

  
261
    def test_finish_chunked
262
      # zeros = Zlib::Deflate.deflate("0" * 100_000)
263
      zeros = "x\234\355\3011\001\000\000\000\302\240J\353\237\316\032\036@" \
264
              "\001\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \
265
              "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \
266
              "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \
267
              "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \
268
              "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \
269
              "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \
270
              "\000\000\000\000\000\000\000\257\006\351\247BH"
271

  
272
      chunks = []
273

  
274
      z = Zlib::Inflate.new
275

  
276
      z.inflate(zeros) do |chunk|
277
        chunks << chunk
278
        break
279
      end
280

  
281
      p :finish
282

  
283
      z.finish do |chunk|
284
        chunks << chunk
285
      end
286

  
287
      assert_equal [16384, 16384, 16384, 16384, 16384, 16384, 1696],
288
                   chunks.map { |chunk| chunk.size }
289

  
290
      assert chunks.all? { |chunk|
291
        chunk =~ /\A0+\z/
292
      }
293
    end
294

  
205 295
    def test_inflate
206 296
      s = Zlib::Deflate.deflate("foo")
207 297
      z = Zlib::Inflate.new
......
212 302
      z << "foo" # ???
213 303
    end
214 304

  
305
    def test_inflate_chunked
306
      # s = Zlib::Deflate.deflate("0" * 100_000)
307
      zeros = "x\234\355\3011\001\000\000\000\302\240J\353\237\316\032\036@" \
308
              "\001\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \
309
              "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \
310
              "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \
311
              "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \
312
              "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \
313
              "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \
314
              "\000\000\000\000\000\000\000\257\006\351\247BH"
315

  
316
      chunks = []
317

  
318
      z = Zlib::Inflate.new
319

  
320
      z.inflate(zeros) do |chunk|
321
        chunks << chunk
322
      end
323

  
324
      assert_equal [16384, 16384, 16384, 16384, 16384, 16384, 1696],
325
                   chunks.map { |chunk| chunk.size }
326

  
327
      assert chunks.all? { |chunk|
328
        chunk =~ /\A0+\z/
329
      }
330
    end
331

  
332
    def test_inflate_chunked_break
333
      # zeros = Zlib::Deflate.deflate("0" * 100_000)
334
      zeros = "x\234\355\3011\001\000\000\000\302\240J\353\237\316\032\036@" \
335
              "\001\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \
336
              "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \
337
              "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \
338
              "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \
339
              "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \
340
              "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \
341
              "\000\000\000\000\000\000\000\257\006\351\247BH"
342

  
343
      chunks = []
344

  
345
      z = Zlib::Inflate.new
346

  
347
      z.inflate(zeros) do |chunk|
348
        chunks << chunk
349
        break
350
      end
351

  
352
      out = z.inflate nil
353

  
354
      assert_equal 100_000 - chunks.first.length, out.length
355
    end
356

  
215 357
    def test_inflate_dictionary
216 358
      dictionary = "foo"
217 359