<?xml version="1.0" encoding="ISO-8859-1"?>

<rdf:RDF
 xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
 xmlns="http://purl.org/rss/1.0/"
 xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/"
 xmlns:dc="http://purl.org/dc/elements/1.1/"
 xmlns:syn="http://purl.org/rss/1.0/modules/syndication/"
 xmlns:prism="http://purl.org/rss/1.0/modules/prism/"
 xmlns:admin="http://webns.net/mvcb/"
>

<channel rdf:about="http://epm.sagepub.com">
<title>Educational and Psychological Measurement recent issues</title>
<link>http://epm.sagepub.com</link>
<description>Educational and Psychological Measurement RSS feed -- recent issues</description>
<prism:publicationName>Educational and Psychological Measurement</prism:publicationName>
<prism:issn>0013-1644</prism:issn>
<items>
 <rdf:Seq>
  <rdf:li rdf:resource="http://epm.sagepub.com/cgi/content/abstract/68/4/537?rss=1" />
  <rdf:li rdf:resource="http://epm.sagepub.com/cgi/content/abstract/68/4/554?rss=1" />
  <rdf:li rdf:resource="http://epm.sagepub.com/cgi/content/abstract/68/4/571?rss=1" />
  <rdf:li rdf:resource="http://epm.sagepub.com/cgi/content/abstract/68/4/587?rss=1" />
  <rdf:li rdf:resource="http://epm.sagepub.com/cgi/content/abstract/68/4/603?rss=1" />
  <rdf:li rdf:resource="http://epm.sagepub.com/cgi/content/abstract/68/4/621?rss=1" />
  <rdf:li rdf:resource="http://epm.sagepub.com/cgi/content/abstract/68/4/643?rss=1" />
  <rdf:li rdf:resource="http://epm.sagepub.com/cgi/content/abstract/68/4/659?rss=1" />
  <rdf:li rdf:resource="http://epm.sagepub.com/cgi/content/abstract/68/4/676?rss=1" />
  <rdf:li rdf:resource="http://epm.sagepub.com/cgi/content/abstract/68/4/695?rss=1" />
  <rdf:li rdf:resource="http://epm.sagepub.com/cgi/content/abstract/68/3/357?rss=1" />
  <rdf:li rdf:resource="http://epm.sagepub.com/cgi/content/abstract/68/3/379?rss=1" />
  <rdf:li rdf:resource="http://epm.sagepub.com/cgi/content/abstract/68/3/397?rss=1" />
  <rdf:li rdf:resource="http://epm.sagepub.com/cgi/content/abstract/68/3/413?rss=1" />
  <rdf:li rdf:resource="http://epm.sagepub.com/cgi/content/abstract/68/3/431?rss=1" />
  <rdf:li rdf:resource="http://epm.sagepub.com/cgi/content/abstract/68/3/443?rss=1" />
  <rdf:li rdf:resource="http://epm.sagepub.com/cgi/content/abstract/68/3/464?rss=1" />
  <rdf:li rdf:resource="http://epm.sagepub.com/cgi/content/abstract/68/3/488?rss=1" />
  <rdf:li rdf:resource="http://epm.sagepub.com/cgi/content/abstract/68/3/515?rss=1" />
  <rdf:li rdf:resource="http://epm.sagepub.com/cgi/content/abstract/68/2/181?rss=1" />
  <rdf:li rdf:resource="http://epm.sagepub.com/cgi/content/abstract/68/2/197?rss=1" />
  <rdf:li rdf:resource="http://epm.sagepub.com/cgi/content/abstract/68/2/215?rss=1" />
  <rdf:li rdf:resource="http://epm.sagepub.com/cgi/content/abstract/68/2/233?rss=1" />
  <rdf:li rdf:resource="http://epm.sagepub.com/cgi/content/abstract/68/2/245?rss=1" />
  <rdf:li rdf:resource="http://epm.sagepub.com/cgi/content/abstract/68/2/262?rss=1" />
  <rdf:li rdf:resource="http://epm.sagepub.com/cgi/content/abstract/68/2/281?rss=1" />
  <rdf:li rdf:resource="http://epm.sagepub.com/cgi/content/abstract/68/2/304?rss=1" />
  <rdf:li rdf:resource="http://epm.sagepub.com/cgi/content/abstract/68/2/325?rss=1" />
  <rdf:li rdf:resource="http://epm.sagepub.com/cgi/content/abstract/68/2/335?rss=1" />
  <rdf:li rdf:resource="http://epm.sagepub.com/cgi/content/abstract/68/1/5?rss=1" />
  <rdf:li rdf:resource="http://epm.sagepub.com/cgi/content/abstract/68/1/25?rss=1" />
  <rdf:li rdf:resource="http://epm.sagepub.com/cgi/content/abstract/68/1/42?rss=1" />
  <rdf:li rdf:resource="http://epm.sagepub.com/cgi/content/abstract/68/1/58?rss=1" />
  <rdf:li rdf:resource="http://epm.sagepub.com/cgi/content/abstract/68/1/78?rss=1" />
  <rdf:li rdf:resource="http://epm.sagepub.com/cgi/content/abstract/68/1/97?rss=1" />
  <rdf:li rdf:resource="http://epm.sagepub.com/cgi/content/abstract/68/1/120?rss=1" />
  <rdf:li rdf:resource="http://epm.sagepub.com/cgi/content/abstract/68/1/129?rss=1" />
  <rdf:li rdf:resource="http://epm.sagepub.com/cgi/content/abstract/68/1/139?rss=1" />
  <rdf:li rdf:resource="http://epm.sagepub.com/cgi/content/abstract/68/1/154?rss=1" />
 </rdf:Seq>
</items>
<image rdf:resource="http://epm.sagepub.com:80/icons/banner/title.gif" />
</channel>

<image rdf:about="http://epm.sagepub.com:80/icons/banner/title.gif">
<title>Educational and Psychological Measurement</title>
<url>http://epm.sagepub.com:80/icons/banner/title.gif</url>
<link>http://epm.sagepub.com</link>
</image>

<item rdf:about="http://epm.sagepub.com/cgi/content/abstract/68/4/537?rss=1">
<title><![CDATA[Examining the Relationship Between Race-Based Differential Item Functioning and Item Difficulty]]></title>
<link>http://epm.sagepub.com/cgi/content/abstract/68/4/537?rss=1</link>
<description><![CDATA[<p>Recent research examining racial differences on standardized cognitive tests has focused on the impact of test item difficulty. Studies using data from the SAT and GRE have reported a correlation between item difficulty and differential item functioning (DIF) such that minority test takers are less likely than majority test takers to respond correctly to easy test items. The statistical techniques used and the effect sizes reported in these studies have been heavily criticized. This study addresses these criticisms by examining the relationship between item difficulty and DIF by using alternative statistical techniques based on item response theory and a different standardized test. The results replicate previous research and provide support for the generalizability of the findings.</p>]]></description>
<dc:creator><![CDATA[Scherbaum, C. A., Goldstein, H. W.]]></dc:creator>
<dc:date>2008-07-25</dc:date>
<dc:identifier>info:doi/10.1177/0013164407310129</dc:identifier>
<dc:title><![CDATA[Examining the Relationship Between Race-Based Differential Item Functioning and Item Difficulty]]></dc:title>
<prism:number>4</prism:number>
<prism:volume>68</prism:volume>
<prism:endingPage>553</prism:endingPage>
<prism:publicationDate>2008-08-01</prism:publicationDate>
<prism:startingPage>537</prism:startingPage>
<prism:section>Article</prism:section>
</item>

<item rdf:about="http://epm.sagepub.com/cgi/content/abstract/68/4/554?rss=1">
<title><![CDATA[Computer-Based and Paper-and-Pencil Administration Mode Effects on a Statewide End-of-Course English Test]]></title>
<link>http://epm.sagepub.com/cgi/content/abstract/68/4/554?rss=1</link>
<description><![CDATA[<p>The current study compared student performance between paper-and-pencil testing (PPT) and computer-based testing (CBT) on a large-scale statewide end-of-course English examination. Analyses were conducted at both the item and test levels. The overall results suggest that scores obtained from PPT and CBT were comparable. However, at the content domain level, a rather large difference in the reading comprehension section suggests that reading comprehension test may be more affected by the test administration mode. Results from the confirmatory factor analysis suggest that the administration mode did not alter the construct of the test.</p>]]></description>
<dc:creator><![CDATA[Kim, D.-H., Huynh, H.]]></dc:creator>
<dc:date>2008-07-25</dc:date>
<dc:identifier>info:doi/10.1177/0013164407310132</dc:identifier>
<dc:title><![CDATA[Computer-Based and Paper-and-Pencil Administration Mode Effects on a Statewide End-of-Course English Test]]></dc:title>
<prism:number>4</prism:number>
<prism:volume>68</prism:volume>
<prism:endingPage>570</prism:endingPage>
<prism:publicationDate>2008-08-01</prism:publicationDate>
<prism:startingPage>554</prism:startingPage>
<prism:section>Article</prism:section>
</item>

<item rdf:about="http://epm.sagepub.com/cgi/content/abstract/68/4/571?rss=1">
<title><![CDATA[IRT-LR-DIF With Estimation of the Focal-Group Density as an Empirical Histogram]]></title>
<link>http://epm.sagepub.com/cgi/content/abstract/68/4/571?rss=1</link>
<description><![CDATA[<p>Item response theory&ndash;likelihood ratio&ndash;differential item functioning (IRT-LR-DIF) is used to evaluate the degree to which items on a test or questionnaire have different measurement properties for one group of people versus another, irrespective of group-mean differences on the construct. Usually, the latent distribution is presumed normal for both groups, but previous research shows that results are biased if the true distribution is not approximately normal. This article introduces a variation of IRT-LR-DIF, called empirical histogram&ndash;differential item functioning (EH-DIF), in which the focal-group latent density is estimated simultaneously with the item parameters as an empirical histogram (EH). A simulation study shows that if the focal-group density is nonnormal, Type I error rates and focal-group estimates of the item parameters, mean, and SD are more accurate using EH-DIF than standard IRT-LR-DIF methods that presume normality. A pseudoempirical example is analyzed to illustrate EH-DIF.</p>]]></description>
<dc:creator><![CDATA[Woods, C. M.]]></dc:creator>
<dc:date>2008-07-25</dc:date>
<dc:identifier>info:doi/10.1177/0013164407310133</dc:identifier>
<dc:title><![CDATA[IRT-LR-DIF With Estimation of the Focal-Group Density as an Empirical Histogram]]></dc:title>
<prism:number>4</prism:number>
<prism:volume>68</prism:volume>
<prism:endingPage>586</prism:endingPage>
<prism:publicationDate>2008-08-01</prism:publicationDate>
<prism:startingPage>571</prism:startingPage>
<prism:section>Article</prism:section>
</item>

<item rdf:about="http://epm.sagepub.com/cgi/content/abstract/68/4/587?rss=1">
<title><![CDATA[Comparison of Multiple-Indicators, Multiple-Causes- and Item Response Theory-Based Analyses of Subgroup Differences]]></title>
<link>http://epm.sagepub.com/cgi/content/abstract/68/4/587?rss=1</link>
<description><![CDATA[<p>This research provides a direct comparison of effect size estimates based on structural equation modeling (SEM), item response theory (IRT), and raw scores. Differences between the SEM, IRT, and raw score approaches are examined under a variety of data conditions (IRT models underlying the data, test lengths, magnitude of group differences, and relative size of reference and focal groups). Results show that all studied methods perform similarly. All methods tend to underestimate effects as effect sizes become larger. SEM-based approaches to effect size estimation perform somewhat better at shorter test lengths, whereas approaches based on IRT and raw score perform somewhat better at longer test lengths. Although these differences between methods are detectable, they are small in magnitude.</p>]]></description>
<dc:creator><![CDATA[Willse, J. T., Goodman, J. T.]]></dc:creator>
<dc:date>2008-07-25</dc:date>
<dc:identifier>info:doi/10.1177/0013164407312601</dc:identifier>
<dc:title><![CDATA[Comparison of Multiple-Indicators, Multiple-Causes- and Item Response Theory-Based Analyses of Subgroup Differences]]></dc:title>
<prism:number>4</prism:number>
<prism:volume>68</prism:volume>
<prism:endingPage>602</prism:endingPage>
<prism:publicationDate>2008-08-01</prism:publicationDate>
<prism:startingPage>587</prism:startingPage>
<prism:section>Article</prism:section>
</item>

<item rdf:about="http://epm.sagepub.com/cgi/content/abstract/68/4/603?rss=1">
<title><![CDATA[A Generalizability Theory Approach to Standard Error Estimates for Bookmark Standard Settings]]></title>
<link>http://epm.sagepub.com/cgi/content/abstract/68/4/603?rss=1</link>
<description><![CDATA[<p>The bookmark standard-setting procedure is an item response theory&ndash;based method that is widely implemented in state testing programs. This study estimates standard errors for cut scores resulting from bookmark standard settings under a generalizability theory model and investigates the effects of different universes of generalization and error sources on standard errors. This study produced several notable results. First, different patterns of variance component estimates are found for different cut scores; therefore, researchers should estimate separate variance components for each cut score and use them to estimate corresponding standard errors. Second, different universes of generalization produce different standard error estimates; thus, policy makers should consider which universe is appropriate for the proposed use of cut scores. Third, participants and groups have nonnegligible effects on several error sources. To decrease the standard errors for cut scores, increasing the number of small groups seems more efficient than increasing the number of participants.</p>]]></description>
<dc:creator><![CDATA[Lee, G., Lewis, D. M.]]></dc:creator>
<dc:date>2008-07-25</dc:date>
<dc:identifier>info:doi/10.1177/0013164407312603</dc:identifier>
<dc:title><![CDATA[A Generalizability Theory Approach to Standard Error Estimates for Bookmark Standard Settings]]></dc:title>
<prism:number>4</prism:number>
<prism:volume>68</prism:volume>
<prism:endingPage>620</prism:endingPage>
<prism:publicationDate>2008-08-01</prism:publicationDate>
<prism:startingPage>603</prism:startingPage>
<prism:section>Article</prism:section>
</item>

<item rdf:about="http://epm.sagepub.com/cgi/content/abstract/68/4/621?rss=1">
<title><![CDATA[Latent Mean and Covariance Differences With Measurement Equivalence in College Students With Developmental Difficulties Versus the Wechsler Adult Intelligence Scale-III/Wechsler Memory Scale-III Normative Sample]]></title>
<link>http://epm.sagepub.com/cgi/content/abstract/68/4/621?rss=1</link>
<description><![CDATA[<p>Intelligence tests are usually part of the assessment battery for the diagnosis of adults with learning disabilities (LD) and attention deficit hyperactivity disorder (ADHD). Professionals must ensure that inferences drawn from such test scores are equivalent across populations with and without disabilities. Examination of measurement equivalence provides a direct test of the hypothesis that the same set of latent variables underlies a set of test scores in different groups and metric relationships between observed scores and the corresponding latent variables are the same. The hypothesis of measurement equivalence was examined in two samples of college students: one sample with LD and one sample with ADHD. Scores on the third editions of the Wechsler Adult Intelligence and Memory Scales were compared with an age-matched subset of the conorming sample. Results supported the assumption of measurement equivalence but revealed marked differences across samples in latent variable variances and covariances and latent variable means.</p>]]></description>
<dc:creator><![CDATA[Bowden, S. C., Gregg, N., Bandalos, D., Davis, M., Coleman, C., Holdnack, J. A., Weiss, L. G.]]></dc:creator>
<dc:date>2008-07-25</dc:date>
<dc:identifier>info:doi/10.1177/0013164407310126</dc:identifier>
<dc:title><![CDATA[Latent Mean and Covariance Differences With Measurement Equivalence in College Students With Developmental Difficulties Versus the Wechsler Adult Intelligence Scale-III/Wechsler Memory Scale-III Normative Sample]]></dc:title>
<prism:number>4</prism:number>
<prism:volume>68</prism:volume>
<prism:endingPage>642</prism:endingPage>
<prism:publicationDate>2008-08-01</prism:publicationDate>
<prism:startingPage>621</prism:startingPage>
<prism:section>Article</prism:section>
</item>

<item rdf:about="http://epm.sagepub.com/cgi/content/abstract/68/4/643?rss=1">
<title><![CDATA[A Psychometric Evaluation of Two Achievement Goal Inventories]]></title>
<link>http://epm.sagepub.com/cgi/content/abstract/68/4/643?rss=1</link>
<description><![CDATA[<p>The properties of the achievement goal inventories developed by Grant and Dweck (2003) and Elliot and McGregor (2001) were evaluated in two studies with a total of 780 participants. A four-factor specification for the Grant and Dweck inventory did not closely replicate results published in their original report. In contrast, the structure of the Elliot and McGregor inventory was more or less replicable. In addition, the associations between the scales on both inventories were examined, and both inventories have scales that tap approach-related forms of performance and learning goals. However, a notable advantage of the shorter Elliot and McGregor inventory is that it also includes measures of the avoidance forms of these goals. In sum, the author concluded that the Elliot and McGregor inventory was a superior measure of achievement goal constructs on the basis of practical, psychometric, and theoretical considerations.</p>]]></description>
<dc:creator><![CDATA[Donnellan, M. B.]]></dc:creator>
<dc:date>2008-07-25</dc:date>
<dc:identifier>info:doi/10.1177/0013164407310125</dc:identifier>
<dc:title><![CDATA[A Psychometric Evaluation of Two Achievement Goal Inventories]]></dc:title>
<prism:number>4</prism:number>
<prism:volume>68</prism:volume>
<prism:endingPage>658</prism:endingPage>
<prism:publicationDate>2008-08-01</prism:publicationDate>
<prism:startingPage>643</prism:startingPage>
<prism:section>Article</prism:section>
</item>

<item rdf:about="http://epm.sagepub.com/cgi/content/abstract/68/4/659?rss=1">
<title><![CDATA[Parent Ratings Using the Chinese Version of the Parent Gifted Rating Scales-School Form Reliability and Validity for Chinese Students]]></title>
<link>http://epm.sagepub.com/cgi/content/abstract/68/4/659?rss=1</link>
<description><![CDATA[<p>This study examined the reliability and validity of the scores of a Chinese-translated version of the Gifted Rating Scales&ndash;School Form (GRS-S) using parents as raters and explored the effects of gender and grade on the ratings. A total of 222 parents participated in the study and rated their child independently using the Chinese version of the Parent GRS-S (CVPGRS-S). Results indicate high reliability for parent rating scores and statistically significant correlations between CVPGRS-S scale scores and students' classroom academic achievement scores. The effect sizes of the relationships were moderate. Results revealed no statistically significant grade effect on any of the six CVPGRS-S scales; small but statistically significant differences in favor of females on the six CVPGRS-S scales were found. However, the effect size was small. Research results suggest that the Chinese-translated parent version of the GRS-S holds promise for use with Chinese parents.</p>]]></description>
<dc:creator><![CDATA[Huijun Li,  , Lee, D., Pfeiffer, S. I., Petscher, Y.]]></dc:creator>
<dc:date>2008-07-25</dc:date>
<dc:identifier>info:doi/10.1177/0013164407313365</dc:identifier>
<dc:title><![CDATA[Parent Ratings Using the Chinese Version of the Parent Gifted Rating Scales-School Form Reliability and Validity for Chinese Students]]></dc:title>
<prism:number>4</prism:number>
<prism:volume>68</prism:volume>
<prism:endingPage>675</prism:endingPage>
<prism:publicationDate>2008-08-01</prism:publicationDate>
<prism:startingPage>659</prism:startingPage>
<prism:section>Article</prism:section>
</item>

<item rdf:about="http://epm.sagepub.com/cgi/content/abstract/68/4/676?rss=1">
<title><![CDATA[Internalizing and Externalizing Behavior Problem Scores: Cross-Ethnic and Longitudinal Measurement Invariance of the Behavior Problem Index]]></title>
<link>http://epm.sagepub.com/cgi/content/abstract/68/4/676?rss=1</link>
<description><![CDATA[<p>Accurate measurement of behavioral functioning is a cornerstone of research on disparities in child development. This study used the National Longitudinal Survey of Youth 1979 (NLSY79) data to test measurement invariance of the Behavior Problem Index (BPI) during middle childhood across three ethnic groups. Using the internalizing and externalizing behavior problem division derived by Parcel and Menaghan (1988) and suggested for use with NLSY79 data, the configural invariance hypothesis was not supported. The BPI factor structure model was revised based on theoretical considerations using the division of items from the Child Behavior Checklist. This model demonstrated configural invariance across ethnic groups and over time. Moreover, measurement invariance of factor loadings and thresholds across ethnic groups at each time point and within each ethnic group over time was also supported. The implications of these findings for educational and cross-cultural research are outlined.</p>]]></description>
<dc:creator><![CDATA[Guttmannova, K., Szanyi, J. M., Cali, P. W.]]></dc:creator>
<dc:date>2008-07-25</dc:date>
<dc:identifier>info:doi/10.1177/0013164407310127</dc:identifier>
<dc:title><![CDATA[Internalizing and Externalizing Behavior Problem Scores: Cross-Ethnic and Longitudinal Measurement Invariance of the Behavior Problem Index]]></dc:title>
<prism:number>4</prism:number>
<prism:volume>68</prism:volume>
<prism:endingPage>694</prism:endingPage>
<prism:publicationDate>2008-08-01</prism:publicationDate>
<prism:startingPage>676</prism:startingPage>
<prism:section>Article</prism:section>
</item>

<item rdf:about="http://epm.sagepub.com/cgi/content/abstract/68/4/695?rss=1">
<title><![CDATA[Dimensionality Assessment Using the Full-Information Item Bifactor Analysis for Graded Response Data: An Illustration With the State Metacognitive Inventory]]></title>
<link>http://epm.sagepub.com/cgi/content/abstract/68/4/695?rss=1</link>
<description><![CDATA[<p>Dimensionality assessment using the full-information item bifactor model for graded response data is provided. The model applies to data in which each item relates to a general factor and one group factor. Specifically, alternative model specification within item response theory (IRT) is shown to test a scale's factor structure. For illustrative purposes, the bifactor model and competing IRT models were fit to the data of separate cohorts of incoming college students (Cohort 1, <I>n</I> = 1,490; Cohort 2, <I>n</I> = 1,533) to test the dimensionality of an adapted version of the State Metacognitive Inventory. Overall, the bifactor analysis did not strongly support distinct group factors after accounting for the general factor. Instead, results suggested conceptualizing the scale as unidimensional, indicating that scores should be based on the total scale, not subscales. Considerations related to the use of the bifactor IRT model are discussed.</p>]]></description>
<dc:creator><![CDATA[Immekus, J. C., Imbrie, P. K.]]></dc:creator>
<dc:date>2008-07-25</dc:date>
<dc:identifier>info:doi/10.1177/0013164407313366</dc:identifier>
<dc:title><![CDATA[Dimensionality Assessment Using the Full-Information Item Bifactor Analysis for Graded Response Data: An Illustration With the State Metacognitive Inventory]]></dc:title>
<prism:number>4</prism:number>
<prism:volume>68</prism:volume>
<prism:endingPage>709</prism:endingPage>
<prism:publicationDate>2008-08-01</prism:publicationDate>
<prism:startingPage>695</prism:startingPage>
<prism:section>Article</prism:section>
</item>

<item rdf:about="http://epm.sagepub.com/cgi/content/abstract/68/3/357?rss=1">
<title><![CDATA[Testing Latent Mean Differences Between Observed and Unobserved Groups Using Multilevel Factor Mixture Models]]></title>
<link>http://epm.sagepub.com/cgi/content/abstract/68/3/357?rss=1</link>
<description><![CDATA[<p>When assessing latent mean differences, researchers frequently do not explore possible heterogeneity within their data sets. Sources of differences may be functions of a nested data structure or heterogeneity in the form of unobserved classes of observations defined by a difference in factor means. In this study, the use of multilevel structural equation models in combination with factor mixture models (FMMs) for assessing latent mean differences is discussed. Interpretation of single- and multilevel model parameter estimates when comparing latent means for observed and unobserved groups is demonstrated using a large-scale data set in which students are clustered within schools. Methodological dilemmas are discussed, and directions for future research with respect to multilevel FMMs are suggested.</p>]]></description>
<dc:creator><![CDATA[Allua, S., Stapleton, L. M., Beretvas, S. N.]]></dc:creator>
<dc:date>2008-05-06</dc:date>
<dc:identifier>info:doi/10.1177/0013164407312600</dc:identifier>
<dc:title><![CDATA[Testing Latent Mean Differences Between Observed and Unobserved Groups Using Multilevel Factor Mixture Models]]></dc:title>
<prism:number>3</prism:number>
<prism:volume>68</prism:volume>
<prism:endingPage>378</prism:endingPage>
<prism:publicationDate>2008-06-01</prism:publicationDate>
<prism:startingPage>357</prism:startingPage>
<prism:section>Article</prism:section>
</item>

<item rdf:about="http://epm.sagepub.com/cgi/content/abstract/68/3/379?rss=1">
<title><![CDATA[Polytomous Differential Item Functioning and Violations of Ordering of the Expected Latent Trait by the Raw Score]]></title>
<link>http://epm.sagepub.com/cgi/content/abstract/68/3/379?rss=1</link>
<description><![CDATA[<p>The graded response (GR) and generalized partial credit (GPC) models do not imply that examinees ordered by raw observed score will necessarily be ordered on the expected value of the latent trait (OEL). Factors were manipulated to assess whether increased violations of OEL also produced increased Type I error rates in differential item functioning (DIF) procedures conditioned on the raw score. Shorter tests and greater variance in item slope parameters increased OEL violations for the GR data but not for the GPC data. These same factors, combined with group mean differences between the reference and focal groups, increased the Type I error rate for the observed raw score DIF methods for both the GR and GPC data. A procedure conditioned on the classical test theory latent score estimate instead of the observed score helped reduce the Type I error in some of the conditions but not for the shortest tests.</p>]]></description>
<dc:creator><![CDATA[DeMars, C. E.]]></dc:creator>
<dc:date>2008-05-06</dc:date>
<dc:identifier>info:doi/10.1177/0013164407308511</dc:identifier>
<dc:title><![CDATA[Polytomous Differential Item Functioning and Violations of Ordering of the Expected Latent Trait by the Raw Score]]></dc:title>
<prism:number>3</prism:number>
<prism:volume>68</prism:volume>
<prism:endingPage>396</prism:endingPage>
<prism:publicationDate>2008-06-01</prism:publicationDate>
<prism:startingPage>379</prism:startingPage>
<prism:section>Article</prism:section>
</item>

<item rdf:about="http://epm.sagepub.com/cgi/content/abstract/68/3/397?rss=1">
<title><![CDATA[Sources of Validity Evidence for Educational and Psychological Tests]]></title>
<link>http://epm.sagepub.com/cgi/content/abstract/68/3/397?rss=1</link>
<description><![CDATA[<p>This study investigates aspects of validity reflected in a large and diverse sample of published measures used in educational and psychological testing contexts. The current edition of Mental Measurements Yearbook served as the data source for this study. The validity aspects investigated included perspective on validity represented, number and kinds of sources of validity evidence provided, overall evaluation of the favorability of the test, and whether these factors varied as a function of the type of test. Findings reveal that validity information is not routinely provided in terms of modern validity theory, some sources of validity evidence (e.g., consequential) are essentially ignored in validity reports, and the favorability of judgments about a test is more strongly related to the number of validity sources provided than to the perspective on validity taken or other factors. The article concludes with implications for extending and refining current validity theory and validation practice.</p>]]></description>
<dc:creator><![CDATA[Cizek, G. J., Rosenberg, S. L., Koons, H. H.]]></dc:creator>
<dc:date>2008-05-06</dc:date>
<dc:identifier>info:doi/10.1177/0013164407310130</dc:identifier>
<dc:title><![CDATA[Sources of Validity Evidence for Educational and Psychological Tests]]></dc:title>
<prism:number>3</prism:number>
<prism:volume>68</prism:volume>
<prism:endingPage>412</prism:endingPage>
<prism:publicationDate>2008-06-01</prism:publicationDate>
<prism:startingPage>397</prism:startingPage>
<prism:section>Article</prism:section>
</item>

<item rdf:about="http://epm.sagepub.com/cgi/content/abstract/68/3/413?rss=1">
<title><![CDATA[Bayesian Multidimensional IRT Models With a Hierarchical Structure]]></title>
<link>http://epm.sagepub.com/cgi/content/abstract/68/3/413?rss=1</link>
<description><![CDATA[<p>As item response models gain increased popularity in large-scale educational and measurement testing situations, many studies have been conducted on the development and applications of unidimensional and multidimensional models. Recently, attention has been paid to IRT-based models with an overall ability dimension underlying several ability dimensions specific for individual test items, where the focus is mainly on models with dichotomous latent traits. The purpose of this study is to propose such models with continuous latent traits under the Bayesian framework. The proposed models are further compared with the conventional IRT models using Bayesian model choice techniques. The results from simulation studies as well as actual data suggest that (a) such models can be developed; (b) compared with the unidimensional IRT model, the proposed models better describe the actual data; and (c) the use of the proposed IRT models and the multiunidimensional model should be based on different beliefs about the underlying dimensional structure of a test.</p>]]></description>
<dc:creator><![CDATA[Yanyan Sheng,  , Wikle, C. K.]]></dc:creator>
<dc:date>2008-05-06</dc:date>
<dc:identifier>info:doi/10.1177/0013164407308512</dc:identifier>
<dc:title><![CDATA[Bayesian Multidimensional IRT Models With a Hierarchical Structure]]></dc:title>
<prism:number>3</prism:number>
<prism:volume>68</prism:volume>
<prism:endingPage>430</prism:endingPage>
<prism:publicationDate>2008-06-01</prism:publicationDate>
<prism:startingPage>413</prism:startingPage>
<prism:section>Article</prism:section>
</item>

<item rdf:about="http://epm.sagepub.com/cgi/content/abstract/68/3/431?rss=1">
<title><![CDATA[Sample Sizes When Using Multiple Linear Regression for Prediction]]></title>
<link>http://epm.sagepub.com/cgi/content/abstract/68/3/431?rss=1</link>
<description><![CDATA[<p>When using multiple regression for prediction purposes, the issue of minimum required sample size often needs to be addressed. Using a Monte Carlo simulation, models with varying numbers of independent variables were examined and minimum sample sizes were determined for multiple scenarios at each number of independent variables. The scenarios arrive from varying the levels of correlations between the criterion variable and predictor variables as well as among predictor variables. Two minimum sample sizes were determined for each scenario, a good and an excellent prediction level. The relationship between the squared multiple correlation coefficients and minimum necessary sample sizes were examined. A definite relationship, similar to a negative exponential relationship, was found between the squared multiple correlation coefficient and the minimum sample size. As the squared multiple correlation coefficient decreased, the sample size increased at an increasing rate. This study provides guidelines for sample size needed for accurate predictions.</p>]]></description>
<dc:creator><![CDATA[Knofczynski, G. T., Mundfrom, D.]]></dc:creator>
<dc:date>2008-05-06</dc:date>
<dc:identifier>info:doi/10.1177/0013164407310131</dc:identifier>
<dc:title><![CDATA[Sample Sizes When Using Multiple Linear Regression for Prediction]]></dc:title>
<prism:number>3</prism:number>
<prism:volume>68</prism:volume>
<prism:endingPage>442</prism:endingPage>
<prism:publicationDate>2008-06-01</prism:publicationDate>
<prism:startingPage>431</prism:startingPage>
<prism:section>Article</prism:section>
</item>

<item rdf:about="http://epm.sagepub.com/cgi/content/abstract/68/3/443?rss=1">
<title><![CDATA[Self-Efficacy for Self-Regulated Learning: A Validation Study]]></title>
<link>http://epm.sagepub.com/cgi/content/abstract/68/3/443?rss=1</link>
<description><![CDATA[<p>The psychometric properties and multigroup measurement invariance of scores on the Self-Efficacy for Self-Regulated Learning Scale taken from Bandura's Children's Self-Efficacy Scale were assessed in a sample of 3,760 students from Grades 4 to 11. Latent means differences were also examined by gender and school level. Results reveal a unidimensional construct with equivalent factor pattern coefficients for boys and girls and for students in elementary, middle, and high school. Elementary school students report higher self-efficacy for self-regulated learning than do students in middle and high school. The latent factor is related to self-efficacy, self-concept, task goal orientation, apprehension, and achievement.</p>]]></description>
<dc:creator><![CDATA[Usher, E. L., Pajares, F.]]></dc:creator>
<dc:date>2008-05-06</dc:date>
<dc:identifier>info:doi/10.1177/0013164407308475</dc:identifier>
<dc:title><![CDATA[Self-Efficacy for Self-Regulated Learning: A Validation Study]]></dc:title>
<prism:number>3</prism:number>
<prism:volume>68</prism:volume>
<prism:endingPage>463</prism:endingPage>
<prism:publicationDate>2008-06-01</prism:publicationDate>
<prism:startingPage>443</prism:startingPage>
<prism:section>Article</prism:section>
</item>

<item rdf:about="http://epm.sagepub.com/cgi/content/abstract/68/3/464?rss=1">
<title><![CDATA[Performance and Mastery Orientation of High School and University/College Students: A Rasch Perspective]]></title>
<link>http://epm.sagepub.com/cgi/content/abstract/68/3/464?rss=1</link>
<description><![CDATA[<p>This investigation assesses performance and mastery orientation from a Rasch perspective among high school and university students and provides a complementary approach to the factor analytic methods typical in goal theory research. Data shows that both school and university students are high in mastery orientation relative to performance orientation, and there is broad agreement for the separation of performance and mastery orientation. However, there are fewer school&mdash;university differences on performance orientation than mastery orientation, with university students more mastery oriented than high school students. Although performance orientation holds up well from a psychometric perspective, data indicates that for both school and university samples, mastery orientation items do not adequately differentiate high&mdash; from low&mdash;mastery-oriented students. Finally, the Rasch approach suggests that for both school and university students, there may exist a hierarchical structure to performance and mastery orientation. Implications for goal theory and the measurement of goal orientations are discussed.</p>]]></description>
<dc:creator><![CDATA[Martin, A. J., Marsh, H. W., Debus, R. L., Malmberg, L.-E.]]></dc:creator>
<dc:date>2008-05-06</dc:date>
<dc:identifier>info:doi/10.1177/0013164407308478</dc:identifier>
<dc:title><![CDATA[Performance and Mastery Orientation of High School and University/College Students: A Rasch Perspective]]></dc:title>
<prism:number>3</prism:number>
<prism:volume>68</prism:volume>
<prism:endingPage>487</prism:endingPage>
<prism:publicationDate>2008-06-01</prism:publicationDate>
<prism:startingPage>464</prism:startingPage>
<prism:section>Article</prism:section>
</item>

<item rdf:about="http://epm.sagepub.com/cgi/content/abstract/68/3/488?rss=1">
<title><![CDATA[Development of a Five-Dimensional Measure of Adult Sleep Quality]]></title>
<link>http://epm.sagepub.com/cgi/content/abstract/68/3/488?rss=1</link>
<description><![CDATA[<p>This article describes the development of a measure of adult sleep quality: the Adult Sleep&mdash;Wake Scale (ADSWS). The ADSWS is a self-report pencil-and-paper measure of sleep quality consisting of five behavioral dimensions (Going to Bed, Falling Asleep, Maintaining Sleep, Reinitiating Sleep, and Returning to Wakefulness). Data were collected from three samples. Study 1 describes the derivation of an initial pool of items. Further scale refinement is described in Studies 2 and 3. Construct validity of scores on the ADSWS was examined via correlations between ADSWS scores and scores on (1) two personality variables (Negative Affectivity and Positive Affectivity); (2) three work-related stressors (Interpersonal Conflict, Work Demands, and Job Ambiguity); and (3) three strain outcomes (Depression, Health Complaints, and Frustration). In Study 3, data were collected from participants across three time periods to assess estimates of test&mdash;retest reliability and convergent and divergent validity using procedures described by Campbell and Fiske. The findings indicate that the psychometric properties of scores on the ADSWS exceed criteria for use of an instrument in research settings. In addition, across all three studies, scores on ADSWS subscales correlated, as hypothesized, with personality, stressor, and strain variables.</p>]]></description>
<dc:creator><![CDATA[Fortunato, V. J., LeBourgeois, M. K., Harsh, J.]]></dc:creator>
<dc:date>2008-05-06</dc:date>
<dc:identifier>info:doi/10.1177/0013164407308470</dc:identifier>
<dc:title><![CDATA[Development of a Five-Dimensional Measure of Adult Sleep Quality]]></dc:title>
<prism:number>3</prism:number>
<prism:volume>68</prism:volume>
<prism:endingPage>514</prism:endingPage>
<prism:publicationDate>2008-06-01</prism:publicationDate>
<prism:startingPage>488</prism:startingPage>
<prism:section>Article</prism:section>
</item>

<item rdf:about="http://epm.sagepub.com/cgi/content/abstract/68/3/515?rss=1">
<title><![CDATA[The Importance of Construct Breadth When Examining Interrole Conflict]]></title>
<link>http://epm.sagepub.com/cgi/content/abstract/68/3/515?rss=1</link>
<description><![CDATA[<p>Research examining the influence of nonwork issues on work-related outcomes has flourished. Often, however, the breadth of the interrole conflict construct varies widely between studies. To determine if the breadth of the interrole conflict measure makes a difference, the current study compares the criterion-related validity of scores yielded by a work&mdash;nonwork conflict scale and those yielded by a work&mdash;family conflict scale using active-duty U.S. Army soldiers stationed in Germany and Italy with spouses and/or children and without spouses or children. Results demonstrated that the two constructs are related but distinct. In addition, work&mdash;family conflict had a stronger relationship with job satisfaction and turnover intentions for employees with a spouse and/or children than for single, childless employees, whereas work&mdash;nonwork conflict had a stronger relationship with these outcomes for single, childless employees than for employees with a spouse and/or children.</p>]]></description>
<dc:creator><![CDATA[Huffman, A. H., Youngcourt, S. S., Payne, S. C., Castro, C. A.]]></dc:creator>
<dc:date>2008-05-06</dc:date>
<dc:identifier>info:doi/10.1177/0013164407308472</dc:identifier>
<dc:title><![CDATA[The Importance of Construct Breadth When Examining Interrole Conflict]]></dc:title>
<prism:number>3</prism:number>
<prism:volume>68</prism:volume>
<prism:endingPage>530</prism:endingPage>
<prism:publicationDate>2008-06-01</prism:publicationDate>
<prism:startingPage>515</prism:startingPage>
<prism:section>Article</prism:section>
</item>

<item rdf:about="http://epm.sagepub.com/cgi/content/abstract/68/2/181?rss=1">
<title><![CDATA[Evaluating Item Fit for Multidimensional Item Response Models]]></title>
<link>http://epm.sagepub.com/cgi/content/abstract/68/2/181?rss=1</link>
<description><![CDATA[<p>This research examines the utility of the <I>s - </I><sup>2</sup> statistic proposed by Orlando and Thissen (2000) in evaluating item fit for multidimensional item response models. Monte Carlo simulation was conducted to investigate both the Type I error and statistical power of this fit statistic in analyzing two kinds of multidimensional test structures: approximate simple structure and complex structure. Overall, results show that this statistic is capable of evaluating item fit in the application of multidimensional item response models. It is important to identify the structure of multidimensional tests before this fit statistic is applied. For tests with an approximate simple structure, the sampling distribution can be approximated by a standard chi-square distribution. But for tests with a complex structure, that approximation is more complicated. As regards power in detecting the model nonfitting items, the performance of this statistic in multidimensional tests is comparable to that in unidimensional tests.</p>]]></description>
<dc:creator><![CDATA[Bo Zhang,  , Stone, C. A.]]></dc:creator>
<dc:date>2008-03-25</dc:date>
<dc:identifier>info:doi/10.1177/0013164407301547</dc:identifier>
<dc:title><![CDATA[Evaluating Item Fit for Multidimensional Item Response Models]]></dc:title>
<prism:number>2</prism:number>
<prism:volume>68</prism:volume>
<prism:endingPage>196</prism:endingPage>
<prism:publicationDate>2008-04-01</prism:publicationDate>
<prism:startingPage>181</prism:startingPage>
<prism:section>Article</prism:section>
</item>

<item rdf:about="http://epm.sagepub.com/cgi/content/abstract/68/2/197?rss=1">
<title><![CDATA[A Modification to Angoff and Bookmarking Cut Scores to Account for the Imperfect Reliability of Test Scores]]></title>
<link>http://epm.sagepub.com/cgi/content/abstract/68/2/197?rss=1</link>
<description><![CDATA[<p>It is shown that the Angoff and bookmarking cut scores are examples of true score equating that in the real world must be applied to observed scores. In the context of defining minimal competency, the percentage ``failed'' by such methods is a function of the length of the measuring instrument. It is argued that this length is largely arbitrary, being heavily influenced by practical educational constraints. Hence, there is an ambiguity or nonuniqueness about the percentage failed. An argument is advanced that the failure rate should reflect the percentage of true scores below the cut score. A modification to the cut score is derived that achieves this outcome and simultaneously removes the nonuniqueness in the percentage failed.</p>]]></description>
<dc:creator><![CDATA[MacCann, R. G.]]></dc:creator>
<dc:date>2008-03-25</dc:date>
<dc:identifier>info:doi/10.1177/0013164407305584</dc:identifier>
<dc:title><![CDATA[A Modification to Angoff and Bookmarking Cut Scores to Account for the Imperfect Reliability of Test Scores]]></dc:title>
<prism:number>2</prism:number>
<prism:volume>68</prism:volume>
<prism:endingPage>214</prism:endingPage>
<prism:publicationDate>2008-04-01</prism:publicationDate>
<prism:startingPage>197</prism:startingPage>
<prism:section>Article</prism:section>
</item>

<item rdf:about="http://epm.sagepub.com/cgi/content/abstract/68/2/215?rss=1">
<title><![CDATA[A Strategy for Controlling Item Exposure in Multidimensional Computerized Adaptive Testing]]></title>
<link>http://epm.sagepub.com/cgi/content/abstract/68/2/215?rss=1</link>
<description><![CDATA[<p>Although computerized adaptive tests have enjoyed tremendous growth, solutions for important problems remain unavailable. One problem is the control of item exposure rate. Because adaptive algorithms are designed to select optimal items, they choose items with high discriminating power. Thus, these items are selected more often than others, leading to both overexposure and underutilization of some parts of the item pool. Overused items are often compromised, creating a security problem that could threaten the validity of a test. Building on a previously proposed stratification scheme to control the exposure rate for one-dimensional tests, the authors extend their method to multidimensional tests. A strategy is proposed based on stratification in accordance with a functional of the vector of the discrimination parameter, which can be implemented with minimal computational overhead. Both theoretical and empirical validation studies are provided. Empirical results indicate significant improvement over the commonly used method of controlling exposure rate that requires only a reasonable sacrifice in efficiency.</p>]]></description>
<dc:creator><![CDATA[Lee, Y.-H., Ip, E. H., Fuh, C.-D.]]></dc:creator>
<dc:date>2008-03-25</dc:date>
<dc:identifier>info:doi/10.1177/0013164407307007</dc:identifier>
<dc:title><![CDATA[A Strategy for Controlling Item Exposure in Multidimensional Computerized Adaptive Testing]]></dc:title>
<prism:number>2</prism:number>
<prism:volume>68</prism:volume>
<prism:endingPage>232</prism:endingPage>
<prism:publicationDate>2008-04-01</prism:publicationDate>
<prism:startingPage>215</prism:startingPage>
<prism:section>Article</prism:section>
</item>

<item rdf:about="http://epm.sagepub.com/cgi/content/abstract/68/2/233?rss=1">
<title><![CDATA[Population Validity and Cross-Validity: Applications of Distribution Theory for Testing Hypotheses, Setting Confidence Intervals, and Determining Sample Size]]></title>
<link>http://epm.sagepub.com/cgi/content/abstract/68/2/233?rss=1</link>
<description><![CDATA[<p>Applications of distribution theory for the squared multiple correlation coefficient and the squared cross-validation coefficient are reviewed, and computer programs for these applications are made available. The applications include confidence intervals, hypothesis testing, and sample size selection.</p>]]></description>
<dc:creator><![CDATA[Algina, J., Keselman, H.J.]]></dc:creator>
<dc:date>2008-03-25</dc:date>
<dc:identifier>info:doi/10.1177/0013164407305589</dc:identifier>
<dc:title><![CDATA[Population Validity and Cross-Validity: Applications of Distribution Theory for Testing Hypotheses, Setting Confidence Intervals, and Determining Sample Size]]></dc:title>
<prism:number>2</prism:number>
<prism:volume>68</prism:volume>
<prism:endingPage>244</prism:endingPage>
<prism:publicationDate>2008-04-01</prism:publicationDate>
<prism:startingPage>233</prism:startingPage>
<prism:section>Article</prism:section>
</item>

<item rdf:about="http://epm.sagepub.com/cgi/content/abstract/68/2/245?rss=1">
<title><![CDATA[Assessing General and Specific Attitudes in Human Learning Behavior: An Activity Perspective and a Multilevel Modeling Approach]]></title>
<link>http://epm.sagepub.com/cgi/content/abstract/68/2/245?rss=1</link>
<description><![CDATA[<p>This article proposes a multilevel modeling approach to study the general and specific attitudes formed in human learning behavior. Based on the premises of activity theory, it conceptualizes the unit of analysis for attitude measurement as a scalable and evolving activity system rather than a single action. Measurement issues related to this conceptualization, including scale development and validation, are discussed with the help of facet analysis and multilevel structural equation modeling techniques. An empirical study was conducted, and the results indicate that this approach is theoretically and methodologically defensible.</p>]]></description>
<dc:creator><![CDATA[Jun Sun,  , Willson, V. L.]]></dc:creator>
<dc:date>2008-03-25</dc:date>
<dc:identifier>info:doi/10.1177/0013164407308510</dc:identifier>
<dc:title><![CDATA[Assessing General and Specific Attitudes in Human Learning Behavior: An Activity Perspective and a Multilevel Modeling Approach]]></dc:title>
<prism:number>2</prism:number>
<prism:volume>68</prism:volume>
<prism:endingPage>261</prism:endingPage>
<prism:publicationDate>2008-04-01</prism:publicationDate>
<prism:startingPage>245</prism:startingPage>
<prism:section>Article</prism:section>
</item>

<item rdf:about="http://epm.sagepub.com/cgi/content/abstract/68/2/262?rss=1">
<title><![CDATA[The Ways of Coping Scale: A Reliability Generalization Study]]></title>
<link>http://epm.sagepub.com/cgi/content/abstract/68/2/262?rss=1</link>
<description><![CDATA[<p>For more than 20 years, the Ways of Coping Scale (WOCS) has been used extensively to measure coping. Yet beyond the original psychometric data, few studies have reexamined its properties utilizing the enormous body of research generated on the WOCS. Reliability has been assumed to be consistent as an attribute of the test. This study used reliability generalization to identify (a) the variability in reliability estimates for the WOCS scores across studies, (b) the typical score reliability for the WOCS, and (c) the salient features across studies that relate to the variability in reliability estimate scores for the WOCS. Typical reliability across subscale scores ranged from .60 to .75 with Positive Reappraisal showing the least variability and Self-Controlling showing the most. Factors related to this variability were age and format of administration.</p>]]></description>
<dc:creator><![CDATA[Rexrode, K. R., Petersen, S., O'Toole, S.]]></dc:creator>
<dc:date>2008-03-25</dc:date>
<dc:identifier>info:doi/10.1177/0013164407310128</dc:identifier>
<dc:title><![CDATA[The Ways of Coping Scale: A Reliability Generalization Study]]></dc:title>
<prism:number>2</prism:number>
<prism:volume>68</prism:volume>
<prism:endingPage>280</prism:endingPage>
<prism:publicationDate>2008-04-01</prism:publicationDate>
<prism:startingPage>262</prism:startingPage>
<prism:section>Article</prism:section>
</item>

<item rdf:about="http://epm.sagepub.com/cgi/content/abstract/68/2/281?rss=1">
<title><![CDATA[Measurement Bias Across Gender on the Children's Depression Inventory: Evidence for Invariance From Two Latent Variable Models]]></title>
<link>http://epm.sagepub.com/cgi/content/abstract/68/2/281?rss=1</link>
<description><![CDATA[<p>Confirmatory factor analysis for ordered-categorical measures (CFA-OCM) and rating scale item response theory (IRT) analyses explore measurement bias across gender on the Children's Depression Inventory (CDI) in a community sample of 779 children in the third and sixth grades. Given the set of statistical criteria, IRT and CFA-OCM generally establish measurement equivalence. Results substantiate both Craighead et al.'s five-factor model and IRT models with the CDI, demonstrate their convergence regarding bias, support the use of the CDI in cross-gender comparisons, suggest a separate scoring method need not be developed for children in this age range, and provide evidence that previously noted developmental similarities in depression reflect true similarities. Given measurement invariance, observed score analyses demonstrate no statistically significant differences between boys and girls on the CDI total score and four scores created as a function of the factor model. However, girls endorse statistically significant elevated levels on a dysphoria score.</p>]]></description>
<dc:creator><![CDATA[Carle, A. C., Millsap, R. E., Cole, D. A.]]></dc:creator>
<dc:date>2008-03-25</dc:date>
<dc:identifier>info:doi/10.1177/0013164407308471</dc:identifier>
<dc:title><![CDATA[Measurement Bias Across Gender on the Children's Depression Inventory: Evidence for Invariance From Two Latent Variable Models]]></dc:title>
<prism:number>2</prism:number>
<prism:volume>68</prism:volume>
<prism:endingPage>303</prism:endingPage>
<prism:publicationDate>2008-04-01</prism:publicationDate>
<prism:startingPage>281</prism:startingPage>
<prism:section>Article</prism:section>
</item>

<item rdf:about="http://epm.sagepub.com/cgi/content/abstract/68/2/304?rss=1">
<title><![CDATA[Validation of Scores on the Homework Management Scale for High School Students]]></title>
<link>http://epm.sagepub.com/cgi/content/abstract/68/2/304?rss=1</link>
<description><![CDATA[<p>The purpose of this study is to test the validity of scores on the Homework Management Scale (HMS) using 681 rural and 306 urban high school students. Based on a randomized split of the rural sample, the author first conducts exploratory factor analysis on Group 1 (n = 341) and confirmatory factor analysis on Group 2 (n = 340). The results reveal that the HMS comprises five separate yet related factors: arranging environment, managing time, handling distraction, monitoring motivation, and controlling emotion. This factor structure is cross-validated with the data from the urban sample (Group 3). The study finds an adequate level of configural, factor loading, common error covariance, and intercept invariance between Group 2 and Group 3.</p>]]></description>
<dc:creator><![CDATA[Jianzhong Xu,  ]]></dc:creator>
<dc:date>2008-03-25</dc:date>
<dc:identifier>info:doi/10.1177/0013164407301531</dc:identifier>
<dc:title><![CDATA[Validation of Scores on the Homework Management Scale for High School Students]]></dc:title>
<prism:number>2</prism:number>
<prism:volume>68</prism:volume>
<prism:endingPage>324</prism:endingPage>
<prism:publicationDate>2008-04-01</prism:publicationDate>
<prism:startingPage>304</prism:startingPage>
<prism:section>Article</prism:section>
</item>

<item rdf:about="http://epm.sagepub.com/cgi/content/abstract/68/2/325?rss=1">
<title><![CDATA[Testing for Multigroup Invariance of the Computer Anxiety Scale]]></title>
<link>http://epm.sagepub.com/cgi/content/abstract/68/2/325?rss=1</link>
<description><![CDATA[<p>The Computer Anxiety Scale (CAS) measures the perceptions of individuals with respect to their anxiety toward computers. Although the CAS was developed a number of years ago, research has shown that its factor structure has remained stable. Recent cross-cultural studies using samples of college students from various countries have also shown that the construct of computer anxiety as measured by the CAS has not changed. The present study uses structural equation modeling techniques to examine whether the CAS remains stable in two samples of German college students, the first from a traditional attendance-based lecture course and the second using Web-based tools. Results provide support for the invariance of the factor structure of the CAS.</p>]]></description>
<dc:creator><![CDATA[Marcoulides, G. A., Emrich, C., Marcoulides, L. D.]]></dc:creator>
<dc:date>2008-03-25</dc:date>
<dc:identifier>info:doi/10.1177/0013164407308469</dc:identifier>
<dc:title><![CDATA[Testing for Multigroup Invariance of the Computer Anxiety Scale]]></dc:title>
<prism:number>2</prism:number>
<prism:volume>68</prism:volume>
<prism:endingPage>334</prism:endingPage>
<prism:publicationDate>2008-04-01</prism:publicationDate>
<prism:startingPage>325</prism:startingPage>
<prism:section>Article</prism:section>
</item>

<item rdf:about="http://epm.sagepub.com/cgi/content/abstract/68/2/335?rss=1">
<title><![CDATA[Latent Class Analysis of Differential Item Functioning on the Peabody Picture Vocabulary Test-III]]></title>
<link>http://epm.sagepub.com/cgi/content/abstract/68/2/335?rss=1</link>
<description><![CDATA[<p>This study investigated the use of latent class analysis for the detection of differences in item functioning on the Peabody Picture Vocabulary Test&mdash;Third Edition (PPVT-III). A two-class solution for a latent class model appeared to be defined in part by ability because Class 1 was lower in ability than Class 2 on both the PPVT-III and the Expressive Vocabulary Test (EVT). This difference was much larger on the PPVT-III for the low-ability class. No difference was found for the high-ability latent class, suggesting that the difference was a result of something in the PPVT-III that was not present in the EVT. The difference on the PPVT-III for low-ability children appeared to be a result of the structure of the items on the test, which seems to encourage the overriding use of the novel name&mdash;nameless category strategy for the selection of an answer&mdash;a strategy not available on the EVT.</p>]]></description>
<dc:creator><![CDATA[Lee Webb, M.-y., Cohen, A. S., Schwanenflugel, P. J.]]></dc:creator>
<dc:date>2008-03-25</dc:date>
<dc:identifier>info:doi/10.1177/0013164407308474</dc:identifier>
<dc:title><![CDATA[Latent Class Analysis of Differential Item Functioning on the Peabody Picture Vocabulary Test-III]]></dc:title>
<prism:number>2</prism:number>
<prism:volume>68</prism:volume>
<prism:endingPage>351</prism:endingPage>
<prism:publicationDate>2008-04-01</prism:publicationDate>
<prism:startingPage>335</prism:startingPage>
<prism:section>Article</prism:section>
</item>

<item rdf:about="http://epm.sagepub.com/cgi/content/abstract/68/1/5?rss=1">
<title><![CDATA[Comparability of Computer-Based and Paper-and-Pencil Testing in K 12 Reading Assessments: A Meta-Analysis of Testing Mode Effects]]></title>
<link>http://epm.sagepub.com/cgi/content/abstract/68/1/5?rss=1</link>
<description><![CDATA[<p>In recent years, computer-based testing (CBT) has grown in popularity, is increasingly being implemented across the United States, and will likely become the primary mode for delivering tests in the future. Although CBT offers many advantages over traditional paper-and-pencil testing, assessment experts, researchers, practitioners, and users have expressed concern about the comparability of scores between the two test administration modes. To help provide an answer to this issue, a meta-analysis was conducted to synthesize the administration mode effects of CBTs and paper-and-pencil tests on K&mdash;12 student reading assessments. Findings indicate that the administration mode had no statistically significant effect on K&mdash;12 student reading achievement scores. Four moderator variables&mdash;study design, sample size, computer delivery algorithm, and computer practice&mdash;made statistically significant contributions to predicting effect size. Three moderator variables&mdash;grade level, type of test, and computer delivery method&mdash;did not affect the differences in reading scores between test modes.</p>]]></description>
<dc:creator><![CDATA[Shudong Wang,  , Hong Jiao,  , Young, M. J., Brooks, T., Olson, J.]]></dc:creator>
<dc:date>2008-01-03</dc:date>
<dc:identifier>info:doi/10.1177/0013164407305592</dc:identifier>
<dc:title><![CDATA[Comparability of Computer-Based and Paper-and-Pencil Testing in K 12 Reading Assessments: A Meta-Analysis of Testing Mode Effects]]></dc:title>
<prism:number>1</prism:number>
<prism:volume>68</prism:volume>
<prism:endingPage>24</prism:endingPage>
<prism:publicationDate>2008-02-01</prism:publicationDate>
<prism:startingPage>5</prism:startingPage>
<prism:section>Article</prism:section>
</item>

<item rdf:about="http://epm.sagepub.com/cgi/content/abstract/68/1/25?rss=1">
<title><![CDATA[Estimating Standard Errors of Cut Scores for Item Rating and Mapmark Procedures: A Generalizability Theory Approach]]></title>
<link>http://epm.sagepub.com/cgi/content/abstract/68/1/25?rss=1</link>
<description><![CDATA[<p>Standard-setting methods are widely used to determine cut scores on a test that examinees must meet for a certain performance standard. Because standard setting is a measurement procedure, it is important to evaluate variability of cut scores resulting from the standard-setting process. Generalizability theory is used in this study to estimate standard errors of cut scores resulting from two standard-setting methods: item rating (Angoff-based) and mapmark (bookmark-based) methods. In this study, two different generalizability (G) study designs and four different decision (D) study designs were examined, and the impact of varying different aspects of the study design and universes of generalization was examined. Results suggest that cut scores were generally consistent for both methods. The first round standard setting contributed the most to the overall variability for the mapmark method. Also, it is clear that there is no one standard error associated with a certain cut score.</p>]]></description>
<dc:creator><![CDATA[Ping Yin,  , Sconing, J.]]></dc:creator>
<dc:date>2008-01-03</dc:date>
<dc:identifier>info:doi/10.1177/0013164407301546</dc:identifier>
<dc:title><![CDATA[Estimating Standard Errors of Cut Scores for Item Rating and Mapmark Procedures: A Generalizability Theory Approach]]></dc:title>
<prism:number>1</prism:number>
<prism:volume>68</prism:volume>
<prism:endingPage>41</prism:endingPage>
<prism:publicationDate>2008-02-01</prism:publicationDate>
<prism:startingPage>25</prism:startingPage>
<prism:section>Article</prism:section>
</item>

<item rdf:about="http://epm.sagepub.com/cgi/content/abstract/68/1/42?rss=1">
<title><![CDATA[requivalent, Meta-Analysis, and Robustness: An Empirical Examination of Rosenthal and Rubin's Effect Size Indicator]]></title>
<link>http://epm.sagepub.com/cgi/content/abstract/68/1/42?rss=1</link>
<description><![CDATA[<p>Rosenthal and Rubin introduced a general effect size index, r<SUB>equivalent</SUB>, for use in meta-analyses of two-group experiments; it employs p values from reports of the original studies to determine an equivalent t test and the corresponding point-biserial correlation coefficient. The present investigation used Monte Carlo&mdash;simulated meta-analyses to examine the impact on r<SUB>equivalent</SUB> effect sizes of research using independent-groups, pooled-variance t tests with that using a less powerful median test. As expected, estimates based on t were higher. These differences were consistent even in the presence of strong variance heterogeneity when data were distributed normally, but not when data were nonnormal. The results suggested that the use of r<SUB>equivalent</SUB> be confined to combining studies using inferential tests with comparable power and robustness; they also cast doubt on the use of r<SUB>equivalent</SUB> when data are not distributed normally.</p>]]></description>
<dc:creator><![CDATA[Gilpin, A. R.]]></dc:creator>
<dc:date>2008-01-03</dc:date>
<dc:identifier>info:doi/10.1177/0013164407301542</dc:identifier>
<dc:title><![CDATA[requivalent, Meta-Analysis, and Robustness: An Empirical Examination of Rosenthal and Rubin's Effect Size Indicator]]></dc:title>
<prism:number>1</prism:number>
<prism:volume>68</prism:volume>
<prism:endingPage>57</prism:endingPage>
<prism:publicationDate>2008-02-01</prism:publicationDate>
<prism:startingPage>42</prism:startingPage>
<prism:section>Article</prism:section>
</item>

<item rdf:about="http://epm.sagepub.com/cgi/content/abstract/68/1/58?rss=1">
<title><![CDATA[Comparison of Two Approaches for Handling Missing Covariates in Logistic Regression]]></title>
<link>http://epm.sagepub.com/cgi/content/abstract/68/1/58?rss=1</link>
<description><![CDATA[<p>For the past 25 years, methodological advances have been made in missing data treatment. Most published work has focused on missing data in dependent variables under various conditions. The present study seeks to fill the void by comparing two approaches for handling missing data in categorical covariates in logistic regression: the expectation-maximization (EM) method of weights and multiple imputation (MI). Sample data are drawn randomly from a population with known characteristics. Missing data on covariates are simulated under two conditions: missing completely at random and missing at random with different missing rates. A logistic regression model was fit to each sample using either the EM or MI approach. The performance of these two approaches is compared on four criteria: bias, efficiency, coverage, and rejection rate. Results generally favored MI over EM. Practical issues such as implementation, inclusion of continuous covariates, and interactions between covariates are discussed.</p>]]></description>
<dc:creator><![CDATA[Peng, C.-Y. J., Jin Zhu,  ]]></dc:creator>
<dc:date>2008-01-03</dc:date>
<dc:identifier>info:doi/10.1177/0013164407305582</dc:identifier>
<dc:title><![CDATA[Comparison of Two Approaches for Handling Missing Covariates in Logistic Regression]]></dc:title>
<prism:number>1</prism:number>
<prism:volume>68</prism:volume>
<prism:endingPage>77</prism:endingPage>
<prism:publicationDate>2008-02-01</prism:publicationDate>
<prism:startingPage>58</prism:startingPage>
<prism:section>Article</prism:section>
</item>

<item rdf:about="http://epm.sagepub.com/cgi/content/abstract/68/1/78?rss=1">
<title><![CDATA[The Effects of Q-Matrix Misspecification on Parameter Estimates and Classification Accuracy in the DINA Model]]></title>
<link>http://epm.sagepub.com/cgi/content/abstract/68/1/78?rss=1</link>
<description><![CDATA[<p>This article reports a study that investigated the effects of Q-matrix misspecifications on parameter estimates and misclassification rates for the deterministic-input, noisy ``and'' gate (DINA) model, which is a restricted latent class model for multiple classifications of respondents that can be useful for cognitively motivated diagnostic assessment. In this study, a Q-matrix for an assessment mapping all 15 possible attribute patterns based on four independent attributes was misspecified by changing one ``0'' or ``1'' for each item. This was done in a way that ensured that certain attribute combinations were completely deleted from the Q-matrix, and certain incorrect dependency relationships between attributes were represented. Results showed clear effects that included an itemspecific overestimation of slipping parameters when attributes were deleted from the Q-matrix, an item-specific overestimation of guessing parameters when attributes were added to the Q-matrix, and high misclassification rates for attribute classes that contained attribute combinations that were deleted from the Q-matrix.</p>]]></description>
<dc:creator><![CDATA[Rupp, A. A., Templin, J.]]></dc:creator>
<dc:date>2008-01-03</dc:date>
<dc:identifier>info:doi/10.1177/0013164407301545</dc:identifier>
<dc:title><![CDATA[The Effects of Q-Matrix Misspecification on Parameter Estimates and Classification Accuracy in the DINA Model]]></dc:title>
<prism:number>1</prism:number>
<prism:volume>68</prism:volume>
<prism:endingPage>96</prism:endingPage>
<prism:publicationDate>2008-02-01</prism:publicationDate>
<prism:startingPage>78</prism:startingPage>
<prism:section>Article</prism:section>
</item>

<item rdf:about="http://epm.sagepub.com/cgi/content/abstract/68/1/97?rss=1">
<title><![CDATA[A Reliability Generalization Study of Scores on Rotter's and Nowicki-Strickland's Locus of Control Scales]]></title>
<link>http://epm.sagepub.com/cgi/content/abstract/68/1/97?rss=1</link>
<description><![CDATA[<p>The most commonly used measures of locus of control are Rotter's Internality-Externality Scale (I-E) and Nowicki and Strickland's Internality-Externality Scale (NSIE). A reliability generalization study is conducted to explore variability in I-E and NSIE score reliability. Studies are coded for aspects of the scales used (number of response points, number of items) and for sample demographic descriptors (percentage female, average age). Results indicate no statistically significant difference in the predicted internal consistency estimate for I-E Scale versus NSIE Scale scores. Only the percentage female variable is found to predict variation in internal consistency estimates. Testing interval length explains variability in test-retest coefficient estimates. Results and directions for future research are discussed.</p>]]></description>
<dc:creator><![CDATA[Beretvas, S. N., Suizzo, M.-A., Durham, J. A., Yarnell, L. M.]]></dc:creator>
<dc:date>2008-01-03</dc:date>
<dc:identifier>info:doi/10.1177/0013164407301529</dc:identifier>
<dc:title><![CDATA[A Reliability Generalization Study of Scores on Rotter's and Nowicki-Strickland's Locus of Control Scales]]></dc:title>
<prism:number>1</prism:number>
<prism:volume>68</prism:volume>
<prism:endingPage>119</prism:endingPage>
<prism:publicationDate>2008-02-01</prism:publicationDate>
<prism:startingPage>97</prism:startingPage>
<prism:section>Article</prism:section>
</item>

<item rdf:about="http://epm.sagepub.com/cgi/content/abstract/68/1/120?rss=1">
<title><![CDATA[The File Drawer Problem in Reliability Generalization: A Strategy to Compute a Fail-Safe N With Reliability Coefficients]]></title>
<link>http://epm.sagepub.com/cgi/content/abstract/68/1/120?rss=1</link>
<description><![CDATA[<p>Meta-analytic reliability generalizations (RGs) are limited by the scarcity of reliability reporting in primary articles, and currently, RG investigators lack a method to quantify the impact of such nonreporting. This article introduces a stepwise procedure to address this challenge. First, the authors introduce a formula that allows researchers to estimate the lower bound population average reliability for a desired instrument. Second, they present an equation to determine the Fail-Safe N for RG. This equation estimates the number of ``file drawer'' studies required to drop the aggregate score reliability of an instrument below a specified criterion value. Finally, the authors demonstrate the utility of these equations using published RG studies. Comments on the conclusions drawn from each RG application are provided.</p>]]></description>
<dc:creator><![CDATA[Howell, R. T., Shields, A. L.]]></dc:creator>
<dc:date>2008-01-03</dc:date>
<dc:identifier>info:doi/10.1177/0013164407301528</dc:identifier>
<dc:title><![CDATA[The File Drawer Problem in Reliability Generalization: A Strategy to Compute a Fail-Safe N With Reliability Coefficients]]></dc:title>
<prism:number>1</prism:number>
<prism:volume>68</prism:volume>
<prism:endingPage>128</prism:endingPage>
<prism:publicationDate>2008-02-01</prism:publicationDate>
<prism:startingPage>120</prism:startingPage>
<prism:section>Article</prism:section>
</item>

<item rdf:about="http://epm.sagepub.com/cgi/content/abstract/68/1/129?rss=1">
<title><![CDATA[The Validity of Graduate Management Admission Test Scores: A Summary of Studies Conducted From 1997 to 2004]]></title>
<link>http://epm.sagepub.com/cgi/content/abstract/68/1/129?rss=1</link>
<description><![CDATA[<p>The validity of Graduate Management Admission Test (GMAT) scores is examined by summarizing 273 studies conducted between 1997 and 2004. Each of the studies was conducted through the Validity Study Service of the test sponsor and contained identical variables and statistical methods. Validity coefficients from each of the studies were corrected for restriction of range on the predictors. The interquartile range of the validity coefficients for the combination of GMAT scores and undergraduate grades in predicting early graduate school performance was .45 to .63. Further examination of study findings showed that higher validity coefficients were observed for executive MBA programs compared to either full-time or part-time programs. There appeared to be no measurable differences in results for public versus private institutions. For programs not located at U.S. institutions, quantitative scores showed lower average validity coefficients compared to programs located in the United States.</p>]]></description>
<dc:creator><![CDATA[Talento-Miller, E., Rudner, L. M.]]></dc:creator>
<dc:date>2008-01-03</dc:date>
<dc:identifier>info:doi/10.1177/0013164407305581</dc:identifier>
<dc:title><![CDATA[The Validity of Graduate Management Admission Test Scores: A Summary of Studies Conducted From 1997 to 2004]]></dc:title>
<prism:number>1</prism:number>
<prism:volume>68</prism:volume>
<prism:endingPage>138</prism:endingPage>
<prism:publicationDate>2008-02-01</prism:publicationDate>
<prism:startingPage>129</prism:startingPage>
<prism:section>Article</prism:section>
</item>

<item rdf:about="http://epm.sagepub.com/cgi/content/abstract/68/1/139?rss=1">
<title><![CDATA[Concurrent Validity of Wechsler Adult Intelligence Scales Third Edition Index Score Short Forms in the Canadian Standardization Sample]]></title>
<link>http://epm.sagepub.com/cgi/content/abstract/68/1/139?rss=1</link>
<description><![CDATA[<p>This study evaluated the concurrent validity of estimated Wechsler Adult Intelligence Scales&mdash;Third Edition (WAIS-III) index scores using various one- and two-subtest combinations. Participants were the Canadian WAIS-III standardization sample. Using all possible one- and two-subtest combinations, an estimated Verbal Comprehension Index (VCI), an estimated Perceptual Organization Index (POI), and an estimated Working Memory Index (WMI) were generated by prorating relevant subtest scores. As expected, two-subtest short forms were consistently more accurate than one-subtest short forms. Agreement between short-form and full-form index scores was high for two-subtest combinations (range = 88% to 96%) but only moderate with one subtest (range = 62% to 79%). Accuracy did not vary by age, ethnicity, gender, or education. However, accuracy was lowest for index scores in the high average to very superior range. These results suggest that although some two-subtest short forms are useful for estimating VCI, POI, and WMI scores, one-subtest short forms should not be used for this purpose.</p>]]></description>
<dc:creator><![CDATA[Lange, R. T., Iverson, G. L.]]></dc:creator>
<dc:date>2008-01-03</dc:date>
<dc:identifier>info:doi/10.1177/0013164407301530</dc:identifier>
<dc:title><![CDATA[Concurrent Validity of Wechsler Adult Intelligence Scales Third Edition Index Score Short Forms in the Canadian Standardization Sample]]></dc:title>
<prism:number>1</prism:number>
<prism:volume>68</prism:volume>
<prism:endingPage>153</prism:endingPage>
<prism:publicationDate>2008-02-01</prism:publicationDate>
<prism:startingPage>139</prism:startingPage>
<prism:section>Article</prism:section>
</item>

<item rdf:about="http://epm.sagepub.com/cgi/content/abstract/68/1/154?rss=1">
<title><![CDATA[Implications of Self-Deception for Self-Reported Intrinsic and Extrinsic Motivational Dispositions and Actual Learning Performance: A Higher Order Structural Model]]></title>
<link>http://epm.sagepub.com/cgi/content/abstract/68/1/154?rss=1</link>
<description><![CDATA[<p>The authors explored implications of individuals' self-deception (a trait) for their self-reported intrinsic and extrinsic motivational dispositions and their actual learning performance. In doing so, a higher order structural model was developed and tested in which intrinsic and extrinsic motivational dispositions were underlying factors that were each manifested in four distinct propensities that were measured. The authors also tested whether controlling for self-deception influenced predictive relationships. Analyses of data from 429 college students supported the validity of the higher order model and indicated that self-deception was positively related to intrinsic and negatively related to extrinsic motivational dispositions. Self-deception was negatively related, whereas intrinsic and extrinsic motivational dispositions were positively related, to learning performance. Removing the influences of self-deception altered some of the predictive relationships.</p>]]></description>
<dc:creator><![CDATA[Hirschfeld, R. R., Thomas, C. H., McNatt, D. B.]]></dc:creator>
<dc:date>2008-01-03</dc:date>
<dc:identifier>info:doi/10.1177/0013164406299129</dc:identifier>
<dc:title><![CDATA[Implications of Self-Deception for Self-Reported Intrinsic and Extrinsic Motivational Dispositions and Actual Learning Performance: A Higher Order Structural Model]]></dc:title>
<prism:number>1</prism:number>
<prism:volume>68</prism:volume>
<prism:endingPage>173</prism:endingPage>
<prism:publicationDate>2008-02-01</prism:publicationDate>
<prism:startingPage>154</prism:startingPage>
<prism:section>Article</prism:section>
</item>

</rdf:RDF>