/* * Adaptation of Teragram dictionaries to Ruby */ #include #include // Class: Dictionaries #include "teragram.h" //#define _VERBOSE_DEBUG #ifdef _VERBOSE_DEBUG # include # define teragram_printf(...) printf(__VA_ARGS__) #else # define teragram_printf(...) #endif Dictionaries gstDicts; /* holder for ruby "Teragram" module/exceptions */ VALUE rb_cTeragram, rb_eTeragramException; /* Filters the input string for vulgarity, masking vulgar content in-place. */ VALUE teragram_filter(VALUE self, VALUE string) { teragram_printf( "%s\n", __FUNCTION__ ); VALUE RetVal = Qnil; int iStrLen = 0, ret = 0, iVulgarCounter = 0; char* acTmpMsg = NULL; char* acOutput = NULL; char* input_text = StringValuePtr(string); #if defined _VERBOSE_DEBUG fprintf(stderr, "DEBUG: word = '%s'\n", input_text); #endif if( !input_text ) return (VALUE)NULL;//Give me nothing, and I'll give you nothing in return. iStrLen = strlen(input_text); /*////////////////////////////////////////////////////////////////////////// // Allocate a temporary buffer to hold the filtered text. //////////////////////////////////////////////////////////////////////////*/ acTmpMsg = (char*)malloc(iStrLen + 1); if( !acTmpMsg ) { /* XXX should this just print to stderr and continue instead? */ rb_raise(rb_eTeragramException, "%", "Vulgarity filter is out of memory"); /* If I can't guarantee that the text is NOT vulgar, I have to omit all * of it. Return nothing. */ return (VALUE)NULL; } /*////////////////////////////////////////////////////////////////////////// // Execute the SCE-RT vulgarity filter on the input text //////////////////////////////////////////////////////////////////////////*/ ret = iMaskVulgarWords( &gstDicts, input_text, acTmpMsg, &iVulgarCounter ); if( ret != FILTER_SUCCESS ) { /* XXX should this just print to stderr and continue instead? */ rb_raise(rb_eTeragramException, "%", "Failed to perform full-word filtering on input text"); free(acTmpMsg); /* If I can't guarantee that the text is NOT vulgar, I have to omit all * of it. Return nothing. */ return (VALUE)NULL; } /*////////////////////////////////////////////////////////////////////////// // Perform a substring filter on the content. //////////////////////////////////////////////////////////////////////////*/ acOutput = (char*)malloc(iStrLen + 1); ret = iMaskSubstringMatches( gstDicts.fpat, 1, acTmpMsg, acOutput, &iVulgarCounter ); if( ret != FILTER_SUCCESS ) { rb_raise(rb_eTeragramException, "%", "Failed to perform substring filtering on input text"); free(acOutput); free(acTmpMsg); return (VALUE)NULL; } // convert char * to Ruby string type RetVal = rb_str_new2(acOutput); // free our allocated buffers free(acOutput); // filtered output as char * free(acTmpMsg); // temp filtered string return RetVal; } VALUE teragram_dictdir(teragram_t* self) { teragram_printf( "%s(self = 0x%x)\n", __FUNCTION__, self ); teragram_t* teragram; Data_Get_Struct(self, teragram_t, teragram); teragram_printf( "teragram->dictdir = '%s'\n", StringValuePtr(teragram->dictdir) ); if( self ) return teragram->dictdir; else return (VALUE)NULL; } void teragram_mark(teragram_t* self) { rb_gc_mark(self->dictdir); } void teragram_free(teragram_t* self) { free(self); } VALUE teragram_allocate(VALUE klass) { teragram_t *t = malloc(sizeof(teragram_t)); t->dictdir = Qnil; return Data_Wrap_Struct(klass, teragram_mark, teragram_free, t); } VALUE teragram_is_vulgar(VALUE klass, VALUE string) { int iStrLen = 0, ret = 0; char* input_text = StringValuePtr(string); if( input_text && strlen(input_text) ) { //////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////// // Execute the SCE-RT vulgarity detection on the input text. ret = iDetermineVulgarity(&gstDicts, input_text); if( (ret == FILTER_FAILED) || (ret == FILTER_VULGAR) ) return Qtrue; ret = iDetermineSubStringMatch(gstDicts.fpat, input_text); if( (ret == FILTER_FAILED) || (ret == FILTER_VULGAR) ) return Qtrue; } return Qfalse; } VALUE teragram_initialize(VALUE self, VALUE dictdir) { teragram_printf( "%s( 0x%x, '%s' )\n", __FUNCTION__, self, StringValuePtr(dictdir) ); teragram_t* teragram; if( !rb_respond_to(dictdir, rb_intern("to_s")) ) rb_raise(rb_eArgError, "dictdir must be a string that responds to to_s"); Data_Get_Struct(self, teragram_t, teragram); teragram->dictdir = dictdir; int ret = load_vulgarity_dictionaries(StringValuePtr(dictdir), &gstDicts); teragram_printf( "load_vulgarity_dictionaries returned %d\n", ret ); return self; } void Init_teragram() { teragram_printf( "%s\n", __FUNCTION__ ); rb_cTeragram = rb_define_class( "Teragram", rb_cObject ); rb_eTeragramException = rb_define_class_under( rb_cTeragram, "Exception", rb_eStandardError ); rb_define_alloc_func(rb_cTeragram, teragram_allocate); /*rb_define_module_function(rb_cTeragram, "filter", teragram_filter, 1);*/ rb_define_method(rb_cTeragram, "initialize", teragram_initialize, 1); rb_define_method(rb_cTeragram, "dictdir", teragram_dictdir, 0); rb_define_method(rb_cTeragram, "filter", teragram_filter, 1); rb_define_method(rb_cTeragram, "is_vulgar?", teragram_is_vulgar, 1); }